From acc67a6f1c7b1983716eebae650aa52e2276e91c Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 4 Jul 2024 14:44:50 +0800
Subject: [PATCH 001/187] =?UTF-8?q?=F0=9F=A4=96=20update=20workflow?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/poetry-publish.yml | 3 +--
 .github/workflows/pytest-ci.yml      | 2 +-
 .pre-commit-config.yaml              | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/poetry-publish.yml b/.github/workflows/poetry-publish.yml
index 64172804..d505b8d9 100644
--- a/.github/workflows/poetry-publish.yml
+++ b/.github/workflows/poetry-publish.yml
@@ -27,6 +27,5 @@ jobs:
       - name: Publish python poetry package
         uses: JRubics/poetry-publish@v2.0
         with:
-          python_version: "3.11"
+          poetry_install_options: "--sync"
           pypi_token: ${{ secrets.PYPI_API_TOKEN }}
-          ignore_dev_requirements: "yes"
diff --git a/.github/workflows/pytest-ci.yml b/.github/workflows/pytest-ci.yml
index 83b4d1f9..a5bfbb4b 100644
--- a/.github/workflows/pytest-ci.yml
+++ b/.github/workflows/pytest-ci.yml
@@ -31,7 +31,7 @@ jobs:
 
       - name: Install test dependencies
         run: |
-          poetry install --with test
+          poetry install --with test --sync
 
       - name: Run pytest
         uses: pavelzw/pytest-action@v2
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 910bd466..3e03697b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -46,7 +46,7 @@ repos:
         args: [--pytest-test-first]
       - id: requirements-txt-fixer
       - id: pretty-format-json
-        args: [--autofix]
+        args: [--autofix, --indent 2]
       - id: no-commit-to-branch
 
   - repo: https://github.com/python-poetry/poetry

From 03cf5fad155bce2af3fd1c4150f0cd6662c22753 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 4 Jul 2024 20:01:01 +0800
Subject: [PATCH 002/187] =?UTF-8?q?=E2=9C=A8=20MetaNeuron=20supports=20ANN?=
 =?UTF-8?q?=20runtime=20mode?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/base.py  | 323 ++++++++++++++++--------------
 paibox/components/neuron/utils.py |  70 +++++--
 paibox/mixin.py                   |   9 +-
 paibox/types.py                   |  17 +-
 4 files changed, 250 insertions(+), 169 deletions(-)

diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 29d5bb06..51899572 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -1,7 +1,13 @@
+import sys
 import warnings
 from collections.abc import Iterable
 from typing import Any, Literal, NoReturn, Optional, Union
 
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
 import numpy as np
 from numpy.typing import NDArray
 from paicorelib import (
@@ -13,13 +19,25 @@
     SIM,
     TM,
     HwConfig,
-    MaxPoolingEnable,
+    InputWidthFormat,
     SpikeWidthFormat,
+    SNNModeEnable,
+    CoreMode,
+    get_core_mode,
 )
 
 from paibox.base import NeuDyn
 from paibox.exceptions import PAIBoxWarning, ShapeError
-from paibox.types import LeakVType, Shape, SpikeType, VoltageType
+from paibox.types import (
+    NEUOUT_U8_DTYPE,
+    SPIKE_DTYPE,
+    VOLTAGE_DTYPE,
+    LeakVType,
+    NeuOutType,
+    Shape,
+    SpikeType,
+    VoltageType,
+)
 from paibox.utils import (
     arg_check_non_neg,
     arg_check_non_pos,
@@ -28,16 +46,31 @@
     shape2num,
 )
 
-from .utils import NEG_THRES_MIN, _is_leak_v_overflow, _mask, vjt_overflow
+from .utils import (
+    BIT_TRUNCATE_MAX,
+    NEG_THRES_MIN,
+    _leak_v_check,
+    _mask,
+    vjt_overflow,
+    _input_width_format,
+    _spike_width_format,
+    _get_neu_out_dtype,
+)
 
 __all__ = ["Neuron"]
 
 L = Literal
+NeuOutTruncType: TypeAlias = NDArray[NEUOUT_U8_DTYPE]
 
 
 class MetaNeuron:
     """Meta neuron"""
 
+    input_width: InputWidthFormat
+    spike_width: SpikeWidthFormat
+    snn_en: SNNModeEnable
+    mode: CoreMode
+
     def __init__(
         self,
         shape: Shape,
@@ -53,6 +86,9 @@ def __init__(
         leak_v: Union[int, LeakVType],
         synaptic_integr: SIM,
         bit_truncation: int,
+        input_width: InputWidthFormat,
+        spike_width: SpikeWidthFormat,
+        snn_en: SNNModeEnable,
         overflow_strict: bool,
         keep_shape: bool = False,
     ) -> None:
@@ -62,6 +98,12 @@ def __init__(
         self._shape = as_shape(shape)
         self._n_neuron = shape2num(self._shape)
 
+        self.input_width = input_width
+        self.spike_width = spike_width
+        self.snn_en = snn_en
+        # check whether the mode is valid
+        self.mode = get_core_mode(input_width, spike_width, snn_en)
+
         # DO NOT modify the names of the following variables.
         # They will be exported to the parameter verification model.
         self.reset_mode = reset_mode
@@ -76,6 +118,11 @@ def __init__(
         self.synaptic_integr = synaptic_integr
         self.bit_truncation = bit_truncation  # Unsigned 5-bit
 
+        # Auxiliary attributes or variables.
+        self._thres_mask = _mask(threshold_mask_bits)
+        self.thres_mode = self.init_param(TM.NOT_EXCEEDED)
+        self.overflow_strict = overflow_strict
+
         if isinstance(leak_v, int) or leak_v.size == 1:
             # np.array([x]) is treated as a scalar.
             self.leak_v = int(leak_v)
@@ -89,17 +136,7 @@ def __init__(
                 f"'leak' is either a scalar or have shape (output channels, ), but got ({self._shape[0]},)."
             )
 
-        _is_leak_v_overflow(self.leak_v)
-
-        # TODO These two config below are parameters of CORE.
-        self._spike_width_format: SpikeWidthFormat
-        self._pool_max_en: MaxPoolingEnable
-
-        # Auxiliary attributes or variables.
-        self._thres_mask = _mask(threshold_mask_bits)
-        self.thres_mode = self.init_param(TM.NOT_EXCEEDED).astype(np.uint8)
-        self._v_th_rand = self.init_param(0).astype(np.int32)
-        self.overflow_strict = overflow_strict
+        _leak_v_check(self.leak_v)
 
         if self.synaptic_integr is SIM.MODE_STOCHASTIC:
             warnings.warn(
@@ -121,8 +158,13 @@ def __init__(
                 PAIBoxWarning,
             )
 
+        if bit_truncation > BIT_TRUNCATE_MAX:
+            raise ValueError(
+                f"'bit_truncation' should be less than or equal to {BIT_TRUNCATE_MAX}."
+            )
+
     def _neuronal_charge(
-        self, incoming_v: VoltageType, vjt_pre: VoltageType, strict: bool = False
+        self, incoming_v: VoltageType, vjt_pre: VoltageType
     ) -> VoltageType:
         r"""1. Synaptic integration.
 
@@ -135,13 +177,17 @@ def _neuronal_charge(
                 `vjt` = `vjt_pre` + `_rho_w_ij` * \sum^{N-1}_{i=0} * x_i(t) * w_{i,j}
         """
         if incoming_v.ndim == 2:
-            _v = incoming_v.sum(axis=1, dtype=np.int32)
+            _v = np.sum(incoming_v, axis=1)
         else:
             _v = incoming_v
 
-        v_charged = np.add(vjt_pre, _v, dtype=np.int32)
+        if self.snn_en:
+            v_charged = vjt_pre + _v
+        else:
+            # SNN_EN=0, the previous voltage is unused
+            v_charged = _v
 
-        return vjt_overflow(v_charged, strict)  # Handle with overflow here
+        return vjt_overflow(v_charged, self.overflow_strict)
 
     def _neuronal_leak(self, vjt: VoltageType) -> VoltageType:
         r"""2. Leak integration.
@@ -160,14 +206,17 @@ def _neuronal_leak(self, vjt: VoltageType) -> VoltageType:
 
                 `vjt` = `vjt` + \sgn{`leak_v`}* `_ld` * `_F`
         """
-        if self.leak_direction is LDM.MODE_FORWARD:
-            _ld = np.ones((self._n_neuron,), dtype=np.bool_)
-        else:
-            _ld = np.sign(vjt)
+        if self.snn_en:
+            if self.leak_direction is LDM.MODE_FORWARD:
+                _ld = 1
+            else:
+                _ld = np.sign(vjt)
 
-        v_leaked = np.add(vjt, _ld * self.leak_v, dtype=np.int32)
+            v_leaked = vjt + _ld * self.leak_v
+        else:
+            v_leaked = vjt + self.bias
 
-        return v_leaked
+        return vjt_overflow(v_leaked, self.overflow_strict)
 
     def _neuronal_fire(self, vjt: VoltageType) -> SpikeType:
         r"""3. Threshold comparison.
@@ -188,24 +237,13 @@ def _neuronal_fire(self, vjt: VoltageType) -> SpikeType:
             else
                 `spike` = 0
         """
-        # fixed at 0 since we won't simulate random threshold
-        _v_th_rand = 0 & self._thres_mask
-        self._v_th_rand = self.init_param(_v_th_rand).astype(np.int32)
-
-        if self.neg_thres_mode is NTM.MODE_RESET:
-            _v_th_neg = self.neg_threshold + _v_th_rand
-        else:
-            _v_th_neg = self.neg_threshold
-
-        """Fire"""
         self.thres_mode = np.where(
-            vjt >= self.pos_threshold + _v_th_rand,
+            vjt >= self.pos_threshold,
             TM.EXCEED_POSITIVE,
-            np.where(vjt < -_v_th_neg, TM.EXCEED_NEGATIVE, TM.NOT_EXCEEDED),
-        ).astype(np.uint8)
-
-        spike = np.equal(self.thres_mode, TM.EXCEED_POSITIVE)
+            np.where(vjt + self.neg_threshold < 0, TM.EXCEED_NEGATIVE, TM.NOT_EXCEEDED),
+        )
 
+        spike = self.thres_mode == TM.EXCEED_POSITIVE
         return spike
 
     def _neuronal_reset(self, vjt: VoltageType) -> VoltageType:
@@ -236,48 +274,34 @@ def _neuronal_reset(self, vjt: VoltageType) -> VoltageType:
 
         def _when_exceed_pos() -> VoltageType:
             if self.reset_mode is RM.MODE_NORMAL:
-                return np.full((self._n_neuron,), self.reset_v, dtype=np.int32)
-
+                return np.full_like(vjt, self.reset_v)
             elif self.reset_mode is RM.MODE_LINEAR:
-                return np.subtract(
-                    vjt, self.pos_threshold + self._v_th_rand, dtype=np.int32
-                )
+                return vjt - self.pos_threshold
             else:  # RM.MODE_NONRESET
                 return vjt
 
         def _when_exceed_neg() -> VoltageType:
             if self.neg_thres_mode is NTM.MODE_RESET:
                 if self.reset_mode is RM.MODE_NORMAL:
-                    return np.full((self._n_neuron,), -self.reset_v, dtype=np.int32)
+                    return np.full_like(vjt, -self.reset_v)
                 elif self.reset_mode is RM.MODE_LINEAR:
-                    return np.add(
-                        vjt,
-                        self.neg_threshold + self._v_th_rand,
-                        dtype=np.int32,
-                    )
+                    return vjt + self.neg_threshold
                 else:  # RM.MODE_NONRESET
                     return vjt
-
             else:
-                return np.full((self._n_neuron,), -self.neg_threshold, dtype=np.int32)
+                return np.full_like(vjt, -self.neg_threshold)
 
         # USE "=="!
         v_reset = np.where(
             self.thres_mode == TM.EXCEED_POSITIVE,
             _when_exceed_pos(),
-            np.where(
-                self.thres_mode == TM.EXCEED_NEGATIVE,
-                _when_exceed_neg(),
-                vjt,
-            ),
-        ).astype(np.int32)
-
-        self._aux_post_hook()
+            np.where(self.thres_mode == TM.EXCEED_NEGATIVE, _when_exceed_neg(), vjt),
+        )
 
-        return v_reset
+        return v_reset.astype(VOLTAGE_DTYPE)
 
-    def _relu(self, vj: VoltageType) -> VoltageType:
-        r"""ReLU(ANN mode ONLY)
+    def _bit_truncate(self, vj: VoltageType) -> NeuOutTruncType:
+        r"""Bit Truncation.
 
         If spiking width format is `WIDTH_1BIT`, then
             if `vj` >= `_pos_threshold`, then
@@ -290,66 +314,53 @@ def _relu(self, vj: VoltageType) -> VoltageType:
             else
                 `_yj` = 0
 
-        NOTE: Truncation of membrane potential
-            _bit_truncation   Position of truncation
-                0                  8'd0
-                1               [0], 7'd0
-                2              [1:0], 6'd0
-                X            [X-1:0], {8-X}'d0
-                7              [6:0], 1'd0
-                8                 [7:0]
-               ...                 ...
-                X               [X-1:X-8]
+        NOTE: output under x-bit truncation
+            _bit_truncation  Position of truncation
+                0                   8'd0
+                1                [0], 7'd0
+                2               [1:0], 6'd0
+                X             [X-1:0], {8-X}'d0
+                7               [6:0], 1'd0
+                8                  [7:0]
+               ...                  ...
+                X                [X-1:X-8]
+
+            If the MSB of voltage is greater than the truncation bit, return 8'd255.
         """
 
-        def _when_exceed_pos() -> VoltageType:
-            if self._spike_width_format is SpikeWidthFormat.WIDTH_1BIT:
-                return np.ones((self._n_neuron,), dtype=np.int32)
-
-            if self.bit_truncation >= 8:
-                return np.full(
-                    (self._n_neuron,),
-                    ((vj >> self.bit_truncation) - 8) & ((1 << 8) - 1),
-                    dtype=np.int32,
-                )
-            elif self.bit_truncation > 0:
-                _mask = (1 << self.bit_truncation) - 1
-                _truncated_vj = vj & _mask
-                return np.full(
-                    (self._n_neuron,),
-                    _truncated_vj << (8 - self.bit_truncation),
-                    dtype=np.int32,
-                )
+        def _truncate() -> VoltageType:
+            if (vj >> self.bit_truncation) > 0:  # Saturate truncation
+                return np.full_like(vj, _mask(8))
+            elif self.bit_truncation == 0:
+                return self._vjt0
+            elif self.bit_truncation < 8:
+                return (vj << (8 - self.bit_truncation)) & _mask(8)
             else:
-                return np.zeros((self._n_neuron,), dtype=np.int32)
+                return (vj >> (self.bit_truncation - 8)) & _mask(8)
 
-        y = np.where(
-            vj >= self.pos_threshold,
-            _when_exceed_pos(),
-            np.zeros((self._n_neuron,), dtype=np.int32),
-        ).astype(np.int32)
-
-        return y
+        v_truncated = np.where(
+            self.thres_mode == TM.EXCEED_POSITIVE, _truncate(), self._vjt0
+        )
 
-    def _max_pooling(self, x: np.ndarray) -> None:
-        # TODO
-        pass
+        return v_truncated.astype(NEUOUT_U8_DTYPE)
 
     def _aux_pre_hook(self) -> None:
-        """Pre-hook before the entire activation."""
+        """Pre-hook before the entire update."""
         pass
 
     def _aux_post_hook(self) -> None:
-        """Post-hook after the entire activation."""
-        # Reset the auxiliary threshold mode.
-        self.thres_mode = self.init_param(TM.NOT_EXCEEDED).astype(np.uint8)
+        """Post-hook after the entire update."""
+        # Reset the auxiliary threshold mode
+        self.thres_mode = self.init_param(TM.NOT_EXCEEDED)
 
     def update(
         self, incoming_v: VoltageType, vjt_pre: VoltageType
-    ) -> tuple[SpikeType, VoltageType, NDArray[np.uint8]]:
-        """Update at one time step."""
+    ) -> tuple[Union[SpikeType, NeuOutTruncType], VoltageType]:
+        """Update at one timestep."""
+        self._aux_pre_hook()
+
         # 1. Charge
-        v_charged = self._neuronal_charge(incoming_v, vjt_pre, self.overflow_strict)
+        v_charged = self._neuronal_charge(incoming_v, vjt_pre)
 
         # 2. Leak & fire
         if self.leak_comparison is LCM.LEAK_BEFORE_COMP:
@@ -359,17 +370,38 @@ def update(
             spike = self._neuronal_fire(v_charged)
             v_leaked = self._neuronal_leak(v_charged)
 
-        # Store the intermediate threshold mode & return
-        _debug_thres_mode = self.thres_mode
-
-        # 3. Reset
+        # 3. Reset. Reset is performed in all modes.
         v_reset = self._neuronal_reset(v_leaked)
 
-        return spike, v_reset, _debug_thres_mode
+        if self.spike_width is SpikeWidthFormat.WIDTH_8BIT:
+            # Althought the truncated voltage is of type VOLTAGE_DTYPE, its value <= uint8.
+            # The voltage to truncate is the one before neuronal reset.
+            v_truncated = self._bit_truncate(v_leaked)
+
+        self._aux_post_hook()
+
+        if self.spike_width is SpikeWidthFormat.WIDTH_1BIT:
+            # When output width is 1 bit, bit truncation is not performed.
+            return spike, v_reset
+        else:
+            return v_truncated, v_reset
 
     def init_param(self, param: Any) -> np.ndarray:
         return np.full((self._n_neuron,), param)
 
+    @property
+    def _neu_out_dtype(self) -> type[Union[SPIKE_DTYPE, NEUOUT_U8_DTYPE]]:
+        """dtype of output of neuron."""
+        return _get_neu_out_dtype(self.spike_width)
+
+    @property
+    def _vjt0(self) -> VoltageType:
+        return self.init_param(0).astype(VOLTAGE_DTYPE)
+
+    @property
+    def _neu_out0(self) -> NeuOutType:
+        return self.init_param(0).astype(self._neu_out_dtype)
+
     @property
     def varshape(self) -> tuple[int, ...]:
         return self._shape if self.keep_shape else (self._n_neuron,)
@@ -397,11 +429,14 @@ def __init__(
         leak_integration_mode: Union[L[0, 1], bool, LIM] = LIM.MODE_DETERMINISTIC,
         leak_v: Union[int, LeakVType] = 0,
         synaptic_integration_mode: Union[L[0, 1], bool, SIM] = SIM.MODE_DETERMINISTIC,
-        bit_truncation: int = 0,
+        bit_truncation: int = 8,
         *,
         delay: int = 1,
         tick_wait_start: int = 1,
         tick_wait_end: int = 0,
+        input_width: Union[L[1, 8], InputWidthFormat] = InputWidthFormat.WIDTH_1BIT,
+        spike_width: Union[L[1, 8], SpikeWidthFormat] = SpikeWidthFormat.WIDTH_1BIT,
+        snn_en: bool = True,
         unrolling_factor: int = 1,
         overflow_strict: bool = False,
         keep_shape: bool = True,
@@ -411,6 +446,12 @@ def __init__(
             # XXX *(-1) if passing a negative threshold > 0
             neg_threshold = (-1) * neg_threshold
 
+        if bit_truncation > BIT_TRUNCATE_MAX:
+            raise ValueError(
+                f"'bit_truncation' should be less than or equal to {BIT_TRUNCATE_MAX}, "
+                f"but got {bit_truncation}."
+            )
+
         super().__init__(
             shape,
             reset_mode,
@@ -425,33 +466,26 @@ def __init__(
             leak_v,
             SIM(synaptic_integration_mode),
             arg_check_non_neg(bit_truncation, "bit of tuncation"),
+            _input_width_format(input_width),
+            _spike_width_format(spike_width),
+            SNNModeEnable(snn_en),
             overflow_strict,
             keep_shape,
         )
         super(MetaNeuron, self).__init__(name)
 
         """Stateful attributes. Vector."""
-        # Initial vjt is fixed at 0.
-        self.set_memory("_vjt", self.init_param(0).astype(np.int32))
-        self.set_memory("_inner_spike", self.init_param(0).astype(np.bool_))
-
-        # Not supported for attributes in ANN mode
-        self.set_memory("vj", self.init_param(0).astype(np.int32))
-        self.set_memory("y", self.init_param(0).astype(np.int32))
-
-        """Auxiliary internal stateful attributes for debugging"""
-        self.set_memory(
-            "_debug_thres_mode", self.init_param(TM.NOT_EXCEEDED).astype(np.uint8)
-        )
-
-        # Delay registers
+        self.set_memory("_vjt", self._vjt0)  # Initial vjt is fixed at 0.
+        self.set_memory("_neu_out", self._neu_out0)
         self.set_memory(
             "delay_registers",
             np.zeros(
-                (HwConfig.N_TIMESLOT_MAX,) + self._inner_spike.shape, dtype=np.bool_
+                (HwConfig.N_TIMESLOT_MAX,) + self._neu_out.shape,
+                dtype=self._neu_out.dtype,
             ),
         )
 
+        """Auxiliary internal stateful attributes for debugging"""
         self._delay = arg_check_pos(delay, "'delay'")
         self._tws = arg_check_non_neg(tick_wait_start, "'tick_wait_start'")
         self._twe = arg_check_non_neg(tick_wait_end, "'tick_wait_end'")
@@ -462,31 +496,30 @@ def __len__(self) -> int:
 
     def __call__(
         self, x: Optional[np.ndarray] = None, *args, **kwargs
-    ) -> Optional[SpikeType]:
+    ) -> Optional[NeuOutType]:
         return self.update(x, *args, **kwargs)
 
     def update(
         self, x: Optional[np.ndarray] = None, *args, **kwargs
-    ) -> Optional[SpikeType]:
+    ) -> Optional[NeuOutType]:
         # Priority order is a must.
         # The neuron doesn't work if `tws = 0` & done working
         # until `t - tws + 1 > twe` under the condition `twe > 0`.
         if not self.is_working():
-            self._inner_spike = self.init_param(0).astype(np.bool_)
+            self._neu_out.fill(0)
             return None
 
-        # The neuron is going to work.
         if x is None:
             x = self.sum_inputs()
+        else:
+            x = np.atleast_1d(x)
 
-        self._inner_spike, self._vjt, self._debug_thres_mode = super().update(
-            x, self._vjt
-        )
+        self._neu_out, self._vjt = super().update(x, self._vjt)
 
         idx = (self.timestamp + self.delay_relative - 1) % HwConfig.N_TIMESLOT_MAX
-        self.delay_registers[idx] = self._inner_spike.copy()
+        self.delay_registers[idx] = self._neu_out.copy()
 
-        return self._inner_spike
+        return self._neu_out
 
     def reset_state(self, *args, **kwargs) -> None:
         self.reset_memory()  # Call reset of `StatusMemory`.
@@ -583,16 +616,16 @@ def num_out(self) -> int:
         return self._n_neuron
 
     @property
-    def output(self) -> SpikeType:
-        return self.delay_registers
+    def output(self) -> NeuOutType:
+        return self._neu_out
 
     @property
-    def spike(self) -> SpikeType:
-        return self._inner_spike
+    def spike(self) -> NeuOutType:
+        return self._neu_out
 
     @property
-    def feature_map(self) -> SpikeType:
-        return self._inner_spike.reshape(self.varshape)
+    def feature_map(self) -> NeuOutType:
+        return self._neu_out.reshape(self.varshape)
 
     @property
     def voltage(self) -> VoltageType:
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index d661dd2c..055c03f8 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -1,14 +1,27 @@
 import warnings
-from typing import Union
+from typing import Literal, Union
 
 import numpy as np
+from paicorelib import InputWidthFormat, SpikeWidthFormat
 from paicorelib.framelib.utils import _mask
-from paicorelib.ram_model import LEAK_V_BIT_MAX, LEAK_V_MAX, LEAK_V_MIN
+from paicorelib.ram_model import (
+    BIT_TRUNCATE_MAX,
+    LEAK_V_BIT_MAX,
+    LEAK_V_MAX,
+    LEAK_V_MIN,
+)
 from paicorelib.ram_model import NEG_THRES_MAX as NEG_THRES_UNSIGNED_MAX
 from paicorelib.ram_model import VJT_MAX, VJT_MIN, VJT_PRE_BIT_MAX
 
 from paibox.exceptions import FunctionalError, PAIBoxWarning
-from paibox.types import LeakVType, VoltageType
+from paibox.types import (
+    LeakVType,
+    NEUOUT_U8_DTYPE,
+    SPIKE_DTYPE,
+    VoltageType,
+    VOLTAGE_DTYPE,
+)
+
 
 NEG_THRES_MIN = -NEG_THRES_UNSIGNED_MAX
 
@@ -22,8 +35,8 @@
 
 
 def _is_vjt_overflow(vjt: VoltageType, strict: bool = False) -> bool:
-    # NOTE: In most cases, membrane potential overflow won't occur,
-    # otherwise the result is incorrect.
+    # NOTE: In most cases, membrane potential overflow won't occur, otherwise the result
+    # may be incorrect.
     if np.any(vjt > VJT_MAX) or np.any(vjt < VJT_MIN):
         if strict:
             raise FunctionalError(VJT_OVERFLOW_TEXT)
@@ -51,18 +64,45 @@ def vjt_overflow(vjt: VoltageType, strict: bool = False) -> VoltageType:
             vjt + VJT_RANGE_LIMIT,
             vjt,
         ),
-    ).astype(np.int32)
+    ).astype(VOLTAGE_DTYPE)
 
 
-def _is_leak_v_overflow(leak_v: Union[int, LeakVType], strict: bool = True) -> None:
+def _leak_v_check(leak_v: Union[int, LeakVType]) -> None:
     if isinstance(leak_v, int):
         if leak_v > LEAK_V_MAX or leak_v < LEAK_V_MIN:
-            if strict:
-                raise FunctionalError(LEAK_V_OVERFLOW_TEXT)
-            else:
-                warnings.warn(LEAK_V_OVERFLOW_TEXT, PAIBoxWarning)
-    elif np.any(leak_v > LEAK_V_MAX) or np.any(leak_v < LEAK_V_MIN):
-        if strict:
             raise FunctionalError(LEAK_V_OVERFLOW_TEXT)
-        else:
-            warnings.warn(LEAK_V_OVERFLOW_TEXT, PAIBoxWarning)
+
+    elif np.any(leak_v > LEAK_V_MAX) or np.any(leak_v < LEAK_V_MIN):
+        raise FunctionalError(LEAK_V_OVERFLOW_TEXT)
+
+
+L = Literal
+
+
+def _input_width_format(iwf: Union[L[1, 8], InputWidthFormat]) -> InputWidthFormat:
+    if isinstance(iwf, InputWidthFormat):
+        return iwf
+
+    if iwf == 1:
+        return InputWidthFormat.WIDTH_1BIT
+    else:
+        return InputWidthFormat.WIDTH_8BIT
+
+
+def _spike_width_format(swf: Union[L[1, 8], SpikeWidthFormat]) -> SpikeWidthFormat:
+    if isinstance(swf, SpikeWidthFormat):
+        return swf
+
+    if swf == 1:
+        return SpikeWidthFormat.WIDTH_1BIT
+    else:
+        return SpikeWidthFormat.WIDTH_8BIT
+
+
+def _get_neu_out_dtype(
+    swf: SpikeWidthFormat,
+) -> type[Union[SPIKE_DTYPE, NEUOUT_U8_DTYPE]]:
+    if swf is SpikeWidthFormat.WIDTH_1BIT:
+        return SPIKE_DTYPE
+    else:
+        return NEUOUT_U8_DTYPE
diff --git a/paibox/mixin.py b/paibox/mixin.py
index 95c5f5cf..d96f921e 100644
--- a/paibox/mixin.py
+++ b/paibox/mixin.py
@@ -10,7 +10,7 @@
 from .exceptions import RegisterError
 from .naming import get_unique_name
 from .node import NodeDict
-from .types import VoltageType
+from .types import VOLTAGE_DTYPE, VoltageType
 
 if typing.TYPE_CHECKING:
     from paibox.components import FullConnectedSyn
@@ -147,13 +147,12 @@ def unregister_master(self, key: str) -> Optional["FullConnectedSyn"]:
     def get_master_node(self, key: str) -> Optional[Any]:
         return self.master_nodes.get(key, None)
 
-    def sum_inputs(self, *, init: VoltageType = 0, **kwargs) -> VoltageType:  # type: ignore
-        # TODO Out is a np.ndarray right now, but it may be more than one type.
-        output = init
+    def sum_inputs(self, *args, **kwargs) -> VoltageType:
+        output = 0
         for node in self.master_nodes.values():
             output += node.output.copy()
 
-        return np.array(output).astype(np.int32)
+        return np.asarray(output, dtype=VOLTAGE_DTYPE)
 
 
 class TimeRelatedNode(MixIn):
diff --git a/paibox/types.py b/paibox/types.py
index 37cdea03..82020805 100644
--- a/paibox/types.py
+++ b/paibox/types.py
@@ -17,8 +17,17 @@
 DataArrayType = TypeVar(
     "DataArrayType", int, np.bool_, np.integer, list[int], tuple[int, ...], np.ndarray
 )
-LeakVType: TypeAlias = NDArray[np.int32]
-SpikeType: TypeAlias = NDArray[np.bool_]
-SynOutType: TypeAlias = NDArray[np.int32]
-VoltageType: TypeAlias = NDArray[np.int32]
+
+LEAK_V_DTYPE = np.int32
+SPIKE_DTYPE = np.bool_
+VOLTAGE_DTYPE = np.int32
+NEUOUT_SPIKE_DTYPE = np.bool_
+NEUOUT_U8_DTYPE = np.uint8
+NEUOUT_DTYPE = Union[NEUOUT_SPIKE_DTYPE, NEUOUT_U8_DTYPE]
+
+LeakVType: TypeAlias = NDArray[LEAK_V_DTYPE]
+SpikeType: TypeAlias = NDArray[SPIKE_DTYPE]
+SynOutType: TypeAlias = NDArray[VOLTAGE_DTYPE]
+VoltageType: TypeAlias = NDArray[VOLTAGE_DTYPE]
+NeuOutType: TypeAlias = NDArray[NEUOUT_DTYPE]
 WeightType: TypeAlias = NDArray[Union[np.bool_, np.int8]]

From 2dfbd910b2cc5999a43e646b5712b5a4e284215e Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 4 Jul 2024 20:01:55 +0800
Subject: [PATCH 003/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20neuron?=
 =?UTF-8?q?=20in=20all=20runtime=20modes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/neuron/test_neurons.py | 185 +++++++++++++++++++-----
 1 file changed, 151 insertions(+), 34 deletions(-)

diff --git a/tests/components/neuron/test_neurons.py b/tests/components/neuron/test_neurons.py
index 35281c56..5db42544 100644
--- a/tests/components/neuron/test_neurons.py
+++ b/tests/components/neuron/test_neurons.py
@@ -1,14 +1,17 @@
 import json
 from copy import copy
+from typing import Any, Literal
 
 import numpy as np
+from numpy.typing import NDArray
 import pytest
-from paicorelib import LCM, LDM, LIM, NTM, RM, SIM, TM, NeuronAttrs
+from paicorelib import CoreMode, LCM, LDM, LIM, NTM, RM, SIM, TM, NeuronAttrs
 
 import paibox as pb
 from paibox.components import Neuron
 from paibox.components.neuron.utils import VJT_MAX, VJT_MIN
 from paibox.exceptions import ShapeError
+from paibox.types import NEUOUT_U8_DTYPE, VoltageType
 from paibox.utils import as_shape, shape2num
 
 
@@ -54,6 +57,29 @@ def test_NeuronParams_check():
     with pytest.raises(ShapeError):
         n4 = pb.LIF((10, 20), 1, bias=np.ones((100,)))
 
+    # If CoreMode specifies all configurations, there will be no invalid situations.
+    if len(CoreMode) < 8:
+        with pytest.raises(ValueError):
+            n5 = pb.LIF((100,), 10, input_width=8, spike_width=8, snn_en=True)
+
+
+L = Literal
+
+
+def _reg_kwds(iw: L[1, 8], sw: L[1, 8], snn_en: L[0, 1]) -> dict[str, Any]:
+    return {"input_width": iw, "spike_width": sw, "snn_en": bool(snn_en)}
+
+
+_reg000_kwds = _reg_kwds(1, 1, 0)
+_reg001_kwds = _reg_kwds(1, 1, 1)
+_reg010_kwds = _reg_kwds(1, 8, 0)
+_reg011_kwds = _reg_kwds(1, 8, 1)
+_reg100_kwds = _reg_kwds(8, 1, 0)
+_reg110_kwds = _reg_kwds(8, 8, 0)
+_bann_kwds = _reg000_kwds
+_ann_kwds = _reg110_kwds
+_snn_kwds = _reg001_kwds
+
 
 class TestNeuronBehavior:
     sim = SIM.MODE_DETERMINISTIC
@@ -93,6 +119,7 @@ def test_neuronal_charge(self, incoming_v, x, expected):
             self.sim,
             self.bt,
             keep_shape=True,
+            **_snn_kwds,
         )
         v_charged = n1._neuronal_charge(x, incoming_v)
 
@@ -140,6 +167,7 @@ def test_neuronal_leak(self, lim, ld, incoming_v, leak_v, expected):
             self.sim,
             self.bt,
             keep_shape=True,
+            **_snn_kwds,
         )
         v_leaked = n1._neuronal_leak(incoming_v)
 
@@ -172,6 +200,7 @@ def test_neuronal_fire(self, ntm, incoming_v, neg_thres, pos_thres, expected):
             self.sim,
             self.bt,
             keep_shape=True,
+            **_snn_kwds,
         )
         spike = n1._neuronal_fire(incoming_v)
 
@@ -208,6 +237,7 @@ def test_neuronal_reset(self, ntm, thr_mode, reset_mode, expected):
             self.sim,
             self.bt,
             keep_shape=True,
+            **_snn_kwds,
         )
 
         # Set the threshold mode manually
@@ -253,6 +283,7 @@ def test_vjt_overflow(self, incoming_v, expected_v, expected_spike):
             self.leak_v,
             self.sim,
             self.bt,
+            **_snn_kwds,
         )
 
         pb.FRONTEND_ENV["t"] += 1  # Only update when n1 starts working
@@ -288,13 +319,12 @@ def test_neuron_keep_shape():
     n2 = pb.TonicSpiking((4, 4), 5, keep_shape=False)
 
     assert n1.spike.shape == (16,)
+    assert n1.spike.shape == n1.output.shape
     assert n1.voltage.shape == (4, 4)
-    assert n1.output.shape == (256, 16)
     assert n1.feature_map.shape == (4, 4)
 
     assert n2.spike.shape == (16,)
     assert n2.voltage.shape == (16,)
-    assert n2.output.shape == (256, 16)
     assert n2.feature_map.shape == (16,)
 
 
@@ -364,11 +394,11 @@ def test_NeuronSubView_illegal(self, slice, expectation):
             n_subview = n[slice]
 
 
-class TestNeuron:
+class TestNeuronModeSNN:  # iss = 001
     def test_IF_hard_reset(self):
         n1 = pb.IF(1, 5, 2)
 
-        inp_data = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
+        incoming_v = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
         expected_spike = np.array(
             [[0], [0], [0], [1], [0], [1], [1], [0]], dtype=np.bool_
         )
@@ -376,9 +406,9 @@ def test_IF_hard_reset(self):
             [[2], [1], [4], [2], [3], [2], [2], [0]], dtype=np.int32
         )
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -386,7 +416,7 @@ def test_IF_hard_reset(self):
     def test_IF_soft_reset(self):
         n1 = pb.IF(1, 5, None)
 
-        inp_data = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
+        incoming_v = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
         expected_spike = np.array(
             [[0], [0], [0], [1], [1], [0], [1], [0]], dtype=np.bool_
         )
@@ -394,9 +424,9 @@ def test_IF_soft_reset(self):
             [[2], [1], [4], [4], [0], [2], [1], [-1]], dtype=np.int32
         )
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -405,7 +435,7 @@ def test_LIF_hard_reset(self):
         # hard reset + leak before comparison
         n1 = pb.LIF(shape=1, threshold=5, reset_v=2, leak_v=-1)
 
-        inp_data = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
+        incoming_v = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
         expected_spike = np.array(
             [[0], [0], [0], [1], [0], [0], [1], [0]], dtype=np.bool_
         )
@@ -413,9 +443,9 @@ def test_LIF_hard_reset(self):
             [[1], [-1], [1], [2], [2], [3], [2], [-1]], dtype=np.int32
         )
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -423,7 +453,7 @@ def test_LIF_hard_reset(self):
     def test_LIF_soft_reset(self):
         n1 = pb.LIF(1, 5, reset_v=None, leak_v=-1)
 
-        inp_data = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
+        incoming_v = np.array([2, -1, 3, 5, 1, 2, 4, -2], dtype=np.int8)
         expected_spike = np.array(
             [[0], [0], [0], [1], [0], [0], [0], [0]], dtype=np.bool_
         )
@@ -431,9 +461,9 @@ def test_LIF_soft_reset(self):
             [[1], [-1], [1], [0], [0], [1], [4], [1]], dtype=np.int32
         )
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -443,13 +473,13 @@ def test_LIF_with_bias(self):
         n1 = pb.LIF(shape=1, threshold=6, reset_v=1, leak_v=0, bias=2)
         assert n1.leak_v == n1.bias == 2
 
-        inp_data = np.array([1, 1, 0, 1, 0, 1], dtype=np.bool_)
+        incoming_v = np.array([1, 1, 0, 1, 0, 1], dtype=np.bool_)
         expected_spike = np.array([[0], [1], [0], [1], [0], [1]], dtype=np.bool_)
         expected_vol = np.array([[3], [1], [3], [1], [3], [1]], dtype=np.int32)
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -459,13 +489,13 @@ def test_LIF_both_leak_bias(self):
         n1 = pb.LIF(shape=1, threshold=6, leak_v=-1, bias=2)
         assert n1.leak_v == n1.bias == 1
 
-        inp_data = np.array([1, 1, 0, 1, 0, 1], dtype=np.bool_)
+        incoming_v = np.array([1, 1, 0, 1, 0, 1], dtype=np.bool_)
         expected_spike = np.array([[0], [0], [0], [1], [0], [0]], dtype=np.bool_)
         expected_vol = np.array([[2], [4], [5], [1], [2], [4]], dtype=np.int32)
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -473,7 +503,7 @@ def test_LIF_both_leak_bias(self):
     def test_TonicSpiking(self):
         n1 = pb.TonicSpiking(1, fire_step=3)
 
-        inp_data = np.array([1, 1, 1, 1, 0, 1, 0, 1, 0, 1], dtype=np.bool_)
+        incoming_v = np.array([1, 1, 1, 1, 0, 1, 0, 1, 0, 1], dtype=np.bool_)
         expected_spike = np.array(
             [[0], [0], [1], [0], [0], [0], [0], [1], [0], [0]], dtype=np.bool_
         )
@@ -481,9 +511,9 @@ def test_TonicSpiking(self):
             [[1], [2], [0], [1], [1], [2], [2], [0], [0], [1]], dtype=np.int32
         )
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -491,7 +521,7 @@ def test_TonicSpiking(self):
     def test_PhasicSpiking(self):
         n1 = pb.PhasicSpiking(1, fire_step=3, neg_floor=-2)
 
-        inp_data = np.array([1, 1, 1, 1, 0, 1, 0, 1, 0, 1], dtype=np.bool_)
+        incoming_v = np.array([1, 1, 1, 1, 0, 1, 0, 1, 0, 1], dtype=np.bool_)
         expected_spike = np.array(
             [[0], [0], [1], [0], [0], [0], [0], [0], [0], [0]], dtype=np.bool_
         )
@@ -499,9 +529,9 @@ def test_PhasicSpiking(self):
             [[2], [4], [-3], [-2], [-2], [-2], [-2], [-2], [-2], [-2]], dtype=np.int32
         )
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
@@ -509,13 +539,13 @@ def test_PhasicSpiking(self):
     def test_SpikingRelu(self):
         n1 = pb.SpikingRelu(1)
 
-        inp_data = np.random.randint(0, 2, size=(20, 1), dtype=np.bool_)
+        incoming_v = np.random.randint(0, 2, size=(20, 1), dtype=np.bool_)
 
-        for i in range(inp_data.size):
+        for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(inp_data[i])
+            n1.update(incoming_v[i])
 
-            assert np.array_equal(n1.spike, inp_data[i])
+            assert np.array_equal(n1.spike, incoming_v[i])
 
     def test_sum_inputs_behavior(self, build_Net2):
         net = build_Net2
@@ -584,11 +614,98 @@ def test_AvgPool_Neuron(self, n_window):
         n1 = Neuron(shape=(1,), leak_v=1 - typical_round(n_window / 2), neg_threshold=0)
 
         # Generate upper triangular matrix where the number of 1's increases in sequence.
-        inp_data = np.tril(np.ones((1 + n_window, n_window), dtype=np.bool_))
+        incoming_v = np.tril(np.ones((1 + n_window, n_window), dtype=np.bool_))
 
         for i in range(1 + n_window):
             pb.FRONTEND_ENV["t"] += 1
-            n1.update(np.sum(inp_data[i]))
+            n1.update(np.sum(incoming_v[i]))
 
             expected = (i + 1) >= typical_round(n_window / 2)
             assert np.array_equal(n1.spike[0], expected)
+
+
+class TestNeuronAllModes:
+    """Test neuron with specified 'spike width' & 'snn_en'.
+
+    NOTE: '001' is SNN mode which is tested in the previous cases.
+    """
+
+    @staticmethod
+    def _ann_vjt_func(vj: VoltageType, neuron: Neuron) -> NDArray[NEUOUT_U8_DTYPE]:
+        def _bit_tuncate(bit_tunc: int, vj: VoltageType):
+            if bit_tunc == 0:
+                return np.zeros_like(vj)
+            elif vj >> bit_tunc > 0:  # Saturate truncation
+                return np.full_like(vj, 255)
+            elif bit_tunc < 8:
+                return (vj << (8 - bit_tunc)) & 255
+            else:
+                return (vj >> (bit_tunc - 8)) & 255
+
+        return np.where(
+            vj >= neuron.pos_threshold,
+            _bit_tuncate(neuron.bit_truncation, vj),
+            neuron._vjt0,
+        ).astype(NEUOUT_U8_DTYPE)
+
+    @pytest.mark.parametrize("reg_kwds", [_reg010_kwds, _reg110_kwds])
+    def test_IF_ss10(self, reg_kwds):
+        n1 = pb.IF(1, 0, 0, bit_truncation=8, **reg_kwds)
+
+        incoming_v = np.random.randint(
+            np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(8,), dtype=np.int32
+        )
+
+        for i in range(incoming_v.size):
+            pb.FRONTEND_ENV["t"] += 1
+            n1.update(incoming_v[i])
+            v_bt = self._ann_vjt_func(
+                np.asarray(incoming_v[i], dtype=np.int32),
+                n1,
+            )
+
+            assert np.array_equal(n1.spike, v_bt)
+
+    def test_LIF_ss11(self):
+        pos_thres = 8000
+        n1 = pb.LIF(1, pos_thres, bit_truncation=12, **_reg011_kwds)
+
+        incoming_v = np.random.randint(-10000, 10000, size=(20,), dtype=np.int32)
+        pre_vjt = 0
+
+        for i in range(incoming_v.size):
+            pb.FRONTEND_ENV["t"] += 1
+            n1.update(incoming_v[i])
+
+            pre_vjt += incoming_v[i]
+            spike = pre_vjt >= pos_thres
+
+            v_bt = self._ann_vjt_func(
+                np.asarray(pre_vjt, dtype=np.int32),
+                n1,
+            )
+
+            if spike:
+                pre_vjt -= pos_thres
+
+            assert np.array_equal(n1.spike, v_bt)
+
+    @pytest.mark.parametrize("reg_kwds", [_reg000_kwds, _reg100_kwds])
+    def test_LIF_ss00(self, reg_kwds):
+        pos_thres = 8000
+        n1 = pb.LIF(1, pos_thres, reset_v=2000, bit_truncation=10, **reg_kwds)
+
+        incoming_v = np.random.randint(-10000, 10000, size=(20,), dtype=np.int32)
+        pre_vjt = 0
+
+        for i in range(incoming_v.size):
+            pb.FRONTEND_ENV["t"] += 1
+            n1.update(incoming_v[i])
+
+            pre_vjt = incoming_v[i]
+            spike = pre_vjt >= pos_thres
+
+            if spike:
+                pre_vjt = 2000
+
+            assert np.array_equal(n1.spike[0], spike)

From ec5f1aee12974df4d1ace3a69805d1d4758966d4 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 5 Jul 2024 14:32:56 +0800
Subject: [PATCH 004/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20the=20d?=
 =?UTF-8?q?type=20of=20output=20of=20neuron.=20Use=20NeuOutType=20instead?=
 =?UTF-8?q?=20of=20SpikeType=20&=20u8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/base.py         | 29 +++---------
 paibox/components/synapses/base.py       | 19 ++++----
 paibox/components/synapses/conv_utils.py | 41 +++++++++-------
 paibox/components/synapses/transforms.py | 60 +++++++++++++-----------
 paibox/mixin.py                          |  4 +-
 paibox/types.py                          |  3 +-
 6 files changed, 75 insertions(+), 81 deletions(-)

diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 51899572..3fa0576f 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -1,15 +1,8 @@
-import sys
 import warnings
 from collections.abc import Iterable
 from typing import Any, Literal, NoReturn, Optional, Union
 
-if sys.version_info >= (3, 10):
-    from typing import TypeAlias
-else:
-    from typing_extensions import TypeAlias
-
 import numpy as np
-from numpy.typing import NDArray
 from paicorelib import (
     LCM,
     LDM,
@@ -30,12 +23,10 @@
 from paibox.exceptions import PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
-    SPIKE_DTYPE,
     VOLTAGE_DTYPE,
     LeakVType,
     NeuOutType,
     Shape,
-    SpikeType,
     VoltageType,
 )
 from paibox.utils import (
@@ -54,13 +45,11 @@
     vjt_overflow,
     _input_width_format,
     _spike_width_format,
-    _get_neu_out_dtype,
 )
 
 __all__ = ["Neuron"]
 
 L = Literal
-NeuOutTruncType: TypeAlias = NDArray[NEUOUT_U8_DTYPE]
 
 
 class MetaNeuron:
@@ -218,7 +207,7 @@ def _neuronal_leak(self, vjt: VoltageType) -> VoltageType:
 
         return vjt_overflow(v_leaked, self.overflow_strict)
 
-    def _neuronal_fire(self, vjt: VoltageType) -> SpikeType:
+    def _neuronal_fire(self, vjt: VoltageType) -> NeuOutType:
         r"""3. Threshold comparison.
 
         3.1 Random threshold.
@@ -244,7 +233,7 @@ def _neuronal_fire(self, vjt: VoltageType) -> SpikeType:
         )
 
         spike = self.thres_mode == TM.EXCEED_POSITIVE
-        return spike
+        return spike.astype(NEUOUT_U8_DTYPE)
 
     def _neuronal_reset(self, vjt: VoltageType) -> VoltageType:
         r"""4. Reset.
@@ -300,7 +289,7 @@ def _when_exceed_neg() -> VoltageType:
 
         return v_reset.astype(VOLTAGE_DTYPE)
 
-    def _bit_truncate(self, vj: VoltageType) -> NeuOutTruncType:
+    def _bit_truncate(self, vj: VoltageType) -> NeuOutType:
         r"""Bit Truncation.
 
         If spiking width format is `WIDTH_1BIT`, then
@@ -355,7 +344,7 @@ def _aux_post_hook(self) -> None:
 
     def update(
         self, incoming_v: VoltageType, vjt_pre: VoltageType
-    ) -> tuple[Union[SpikeType, NeuOutTruncType], VoltageType]:
+    ) -> tuple[NeuOutType, VoltageType]:
         """Update at one timestep."""
         self._aux_pre_hook()
 
@@ -389,18 +378,13 @@ def update(
     def init_param(self, param: Any) -> np.ndarray:
         return np.full((self._n_neuron,), param)
 
-    @property
-    def _neu_out_dtype(self) -> type[Union[SPIKE_DTYPE, NEUOUT_U8_DTYPE]]:
-        """dtype of output of neuron."""
-        return _get_neu_out_dtype(self.spike_width)
-
     @property
     def _vjt0(self) -> VoltageType:
         return self.init_param(0).astype(VOLTAGE_DTYPE)
 
     @property
     def _neu_out0(self) -> NeuOutType:
-        return self.init_param(0).astype(self._neu_out_dtype)
+        return self.init_param(0).astype(NEUOUT_U8_DTYPE)
 
     @property
     def varshape(self) -> tuple[int, ...]:
@@ -480,8 +464,7 @@ def __init__(
         self.set_memory(
             "delay_registers",
             np.zeros(
-                (HwConfig.N_TIMESLOT_MAX,) + self._neu_out.shape,
-                dtype=self._neu_out.dtype,
+                (HwConfig.N_TIMESLOT_MAX,) + self._neu_out.shape, dtype=NEUOUT_U8_DTYPE
             ),
         )
 
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index e9e404a3..4163cdca 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -6,7 +6,7 @@
 
 from paibox.base import NeuDyn, SynSys
 from paibox.exceptions import RegisterError, ShapeError
-from paibox.types import DataArrayType, SynOutType, WeightType
+from paibox.types import DataArrayType, NeuOutType, SynOutType, WeightType
 
 from ..modules import BuildingModule
 from ..neuron import Neuron
@@ -50,7 +50,6 @@ def __init__(
         name: Optional[str] = None,
     ) -> None:
         super().__init__(name)
-
         self._source = source
         self._target = target
 
@@ -66,19 +65,21 @@ def __init__(
     def __call__(self, *args, **kwargs) -> SynOutType:
         return self.update(*args, **kwargs)
 
-    def update(self, spike: Optional[np.ndarray] = None, *args, **kwargs) -> SynOutType:
-        # Retrieve the spike at index `timestamp` of the dest neurons
+    def update(self, x: Optional[NeuOutType] = None, *args, **kwargs) -> SynOutType:
+        # Retrieve the output at [timestamp] of the dest neurons
         if self.dest.is_working():
             if isinstance(self.source, InputProj):
-                synin = self.source.output.copy() if spike is None else spike
+                synin = self.source.output if x is None else np.atleast_1d(x)
             else:
                 idx = self.dest.timestamp % HwConfig.N_TIMESLOT_MAX
-                synin = self.source.output[idx].copy() if spike is None else spike
+                synin = (
+                    self.source.delay_registers[idx] if x is None else np.atleast_1d(x)
+                )
         else:
             # Retrieve 0 to the dest neurons if it is not working
-            synin = np.zeros_like(self.source.spike)
+            synin = np.zeros_like(self.source.output)
 
-        self._synout = self.comm(synin).ravel().astype(np.int32)
+        self._synout = self.comm(synin).ravel()
         return self._synout
 
     def reset_state(self, *args, **kwargs) -> None:
@@ -88,7 +89,7 @@ def reset_state(self, *args, **kwargs) -> None:
     def __copy__(self) -> "FullConnSyn":
         return self.__deepcopy__()
 
-    def __deepcopy__(self, memo=None, _nil=[]) -> "FullConnSyn":
+    def __deepcopy__(self) -> "FullConnSyn":
         self._n_copied += 1
 
         return FullConnSyn(
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 05d04b2e..a7235084 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -1,13 +1,18 @@
 from collections.abc import Iterable
 from functools import partial
 from itertools import repeat
-from typing import Any
 
 import numpy as np
 from numpy.typing import NDArray
 
 from paibox.exceptions import ShapeError
-from paibox.types import SpikeType, SynOutType, WeightType
+from paibox.types import (
+    NEUOUT_U8_DTYPE,
+    VOLTAGE_DTYPE,
+    NeuOutType,
+    SynOutType,
+    WeightType,
+)
 
 from .conv_types import Size1Type, Size2Type, Size3Type, SizeAnyType, _Order2d, _Order3d
 
@@ -232,14 +237,14 @@ def _pool2d_kernel_unroll(
 
 
 def _func_pool2d(
-    x_chw: SpikeType,
+    x_chw: NeuOutType,
     out_shape: Size2Type,
     ksize: Size2Type,
     stride: Size2Type,
     padding: Size2Type,
     type: str,
     threshold: int,
-) -> SpikeType:
+) -> NeuOutType:
     xcin, xh, xw = x_chw.shape
     kh, kw = ksize
     oh, ow = out_shape
@@ -276,13 +281,15 @@ def _func_pool2d(
                     )
 
     if type == "avg":
-        return out >= threshold
+        result = out >= threshold
     else:
-        return out.astype(np.bool_)
+        result = out
+
+    return result.astype(NEUOUT_U8_DTYPE)
 
 
 def _conv1d_faster(
-    x_cl: NDArray[Any],
+    x_cl: NeuOutType,
     out_shape: Size1Type,
     kernel: WeightType,
     stride: Size1Type,
@@ -309,11 +316,11 @@ def _conv1d_faster(
     out = col_fm @ col_kernel.T  # + self.bias
 
     # (ol, cout) -> (cout, ol)
-    return out.astype(np.int32).T
+    return out.T.astype(VOLTAGE_DTYPE)
 
 
 def _conv2d_faster(
-    x_chw: NDArray[Any],
+    x_chw: NeuOutType,
     out_shape: Size2Type,
     kernel: WeightType,
     stride: Size2Type,
@@ -343,9 +350,9 @@ def _conv2d_faster(
     # (oh*ow, cin*kh*kw) * (cout, cin*kh*kw)^T = (oh*ow, cout)
     out = col_fm @ col_kernel.T  # + self.bias
     # (oh*ow, cout) -> (cout, oh*ow) -> (cout, oh, ow)
-    out = out.astype(np.int32).T.reshape((cout,) + out_shape)
+    out = out.T.reshape((cout,) + out_shape)
 
-    return out
+    return out.astype(VOLTAGE_DTYPE)
 
 
 def _convtranspose1d_unroll(
@@ -516,7 +523,7 @@ def _convtranspose2d_unroll(
 
 
 def _convtranspose1d_faster(
-    x_cl: NDArray[Any],
+    x_cl: NeuOutType,
     out_shape: Size1Type,
     kernel: WeightType,
     stride: Size1Type,
@@ -565,11 +572,11 @@ def _convtranspose1d_faster(
     # output_padding
     out = np.pad(out, ((0, 0), (0, output_padding[0])), mode="constant")
 
-    return out.astype(np.int32)
+    return out.astype(VOLTAGE_DTYPE)
 
 
 def _convtranspose2d_faster(
-    x_chw: NDArray[Any],
+    x_chw: NeuOutType,
     out_shape: Size2Type,
     kernel: WeightType,
     stride: Size2Type,
@@ -615,7 +622,7 @@ def _convtranspose2d_faster(
     # (oh*ow, cin*kh*kw) * (cin*kh*kw, cout) = (oh*ow, cout)
     out_col = col_fm @ kernel_col.T
     # (oh*ow, cout) -> (oh, ow, cout) -> (cout, oh, ow)
-    out = out_col.astype(np.int32).T.reshape((cout,) + (noh, now))
+    out = out_col.astype(VOLTAGE_DTYPE).T.reshape((cout,) + (noh, now))
 
     # padding & output_padding
     # inverse padding
@@ -633,7 +640,7 @@ def _convtranspose2d_faster(
 
 
 def _1d_im2col(
-    x_padded: NDArray[Any], ol: int, kl: int, stride: Size1Type
+    x_padded: NeuOutType, ol: int, kl: int, stride: Size1Type
 ) -> NDArray[np.int64]:
     cols = np.zeros((ol, x_padded.shape[0] * kl), dtype=np.int64)
 
@@ -648,7 +655,7 @@ def _1d_im2col(
 
 
 def _2d_im2col(
-    x_padded: NDArray[Any], oh: int, ow: int, kh: int, kw: int, stride: Size2Type
+    x_padded: NeuOutType, oh: int, ow: int, kh: int, kw: int, stride: Size2Type
 ) -> NDArray[np.int64]:
     cols = np.zeros((oh * ow, x_padded.shape[0] * kh * kw), dtype=np.int64)
 
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 845d6025..64a6b8ce 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -6,7 +6,14 @@
 from paicorelib import WeightPrecision as WP
 
 from paibox.exceptions import AutoOptimizationWarning, ShapeError
-from paibox.types import DataArrayType, IntScalarType, SpikeType, SynOutType, WeightType
+from paibox.types import (
+    DataArrayType,
+    IntScalarType,
+    NeuOutType,
+    SynOutType,
+    WeightType,
+    VOLTAGE_DTYPE,
+)
 from paibox.utils import is_shape, shape2num, typical_round
 
 from .conv_types import Size1Type, Size2Type, SizeAnyType
@@ -92,14 +99,6 @@ def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
         raise ValueError(f"weight out of range int8, got [{_min}, {_max}].")
 
     if _array.dtype > np.int8:
-        # XXX If it is automatically optimized to int8, it cannot be converted using the 'same_kind' rule.
-        # if _max <= MAX_INT1 and _min >= MIN_INT1:
-        #     warnings.warn(
-        #         f"dtype of weight is optimized automatically, {_array.dtype} -> bool.",
-        #         AutoOptimizationWarning,
-        #     )
-        #     _dtype = np.bool_
-        # else:
         warnings.warn(
             f"dtype of weight is optimized automatically, {_array.dtype} -> int8.",
             AutoOptimizationWarning,
@@ -139,13 +138,15 @@ def _get_weight_precision(weight: WeightType, enable_wp_opt: bool) -> WP:
 class Transform:
     def __init__(self, weights: DataArrayType) -> None:
         self.weights = _set_coarse_dtype(weights)
-
         """The actual weights in synapses. Stored in `np.bool_` or `np.int8` format."""
+
         self.weights.setflags(write=False)
 
     def __call__(self, *args, **kwargs) -> SynOutType:
-        """Ensure that in all subclasses, the output dimensions are (M,)."""
-        raise NotImplementedError
+        # Ensure that in all subclasses, the output dimensions are (M,).
+        raise NotImplementedError(
+            "function '__call__' must be implemented in the subclasses."
+        )
 
     def _get_wp(self, enable_wp_opt: bool) -> WP:
         return _get_weight_precision(self.weights, enable_wp_opt)
@@ -153,7 +154,9 @@ def _get_wp(self, enable_wp_opt: bool) -> WP:
     @property
     def connectivity(self) -> WeightType:
         """The connectivity matrix in `np.ndarray` format."""
-        raise NotImplementedError
+        raise NotImplementedError(
+            "property 'connectivity' must be implemented in the subclasses."
+        )
 
 
 class OneToOne(Transform):
@@ -185,9 +188,9 @@ def __init__(self, num: int, weights: DataArrayType) -> None:
                 f"the ndim of weights must be 0 or 1, but got {self.weights.ndim}."
             )
 
-    def __call__(self, x: SpikeType, *args, **kwargs) -> SynOutType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         # (N,) * (N,) -> (N,)
-        return x * self.weights.astype(np.int32)
+        return x * self.weights.astype(VOLTAGE_DTYPE)
 
     @property
     def connectivity(self):
@@ -233,20 +236,21 @@ def __init__(self, conn_size: Size2Type, weights: DataArrayType) -> None:
                 f"the ndim of weights must be 0 or 2, but got {self.weights.ndim}."
             )
 
-    def __call__(self, x: SpikeType, *args, **kwargs) -> SynOutType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         """
         NOTE:
-            - When weights is a scalar, the output is a scalar (sum * w) & repeated     \
-                `conn_size[1]` times.
+            - When weights is a scalar, the output is a scalar (sum * w) & repeated `conn_size[1]` times.
             - When weights is a matrix, the output is the dot product of `x` & weights.
         """
         if self.weights.ndim == 0:
-            sum_x = np.sum(x, axis=None, dtype=np.int32)
+            sum_x = np.sum(x, dtype=VOLTAGE_DTYPE)
             # (M,)
-            output = np.full((self.conn_size[1],), self.weights * sum_x, dtype=np.int32)
+            output = np.full(
+                (self.conn_size[1],), self.weights * sum_x, dtype=VOLTAGE_DTYPE
+            )
         else:
             # (N,) @ (N, M) -> (M,)
-            output = x @ self.weights.astype(np.int32)
+            output = x @ self.weights.astype(VOLTAGE_DTYPE)
 
         return output
 
@@ -284,11 +288,11 @@ def __init__(
 
         super().__init__(weights)
 
-    def __call__(self, x: SpikeType, *args, **kwargs) -> SynOutType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         # (n?, k) @ (k, m?) -> (n?, m?)
         _x = x.reshape(self.in_shape).transpose(self.axes)
 
-        return _x @ self.weights.astype(np.int32)
+        return _x @ self.weights.astype(VOLTAGE_DTYPE)
 
     @staticmethod
     def _matmul_unroll(
@@ -343,7 +347,7 @@ def __init__(
 
         super().__init__(kernel)
 
-    def __call__(self, x: SpikeType, *args, **kwargs) -> SynOutType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]
 
         # if self.fm_order == "LC":
@@ -381,7 +385,7 @@ def __init__(
 
         super().__init__(kernel)
 
-    def __call__(self, x: SpikeType, *args, **kwargs) -> SynOutType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]
 
         # if self.fm_order == "HWC":
@@ -421,7 +425,7 @@ def __init__(
 
         super().__init__(kernel)
 
-    def __call__(self, x: np.ndarray, *args, **kwargs) -> SynOutType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]
 
         # if self.fm_order == "LC":
@@ -471,7 +475,7 @@ def __init__(
 
         super().__init__(kernel)
 
-    def __call__(self, x: np.ndarray, *args, **kwargs) -> SynOutType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]
 
         # if self.fm_order == "HWC":
@@ -529,7 +533,7 @@ def __init__(
 
         super().__init__(1)
 
-    def __call__(self, x: SpikeType, *args, **kwargs) -> SpikeType:
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> NeuOutType:
         # if self.fm_order == "HWC":
         #     # (N,) -> (H, W, C) -> (C, H, W)
         #     _x = x.reshape(self.in_shape + (self.channels,)).transpose(2, 0, 1)
diff --git a/paibox/mixin.py b/paibox/mixin.py
index d96f921e..f16af011 100644
--- a/paibox/mixin.py
+++ b/paibox/mixin.py
@@ -144,13 +144,13 @@ def register_master(
     def unregister_master(self, key: str) -> Optional["FullConnectedSyn"]:
         return self.master_nodes.pop(key, None)
 
-    def get_master_node(self, key: str) -> Optional[Any]:
+    def get_master_node(self, key: str) -> Optional["FullConnectedSyn"]:
         return self.master_nodes.get(key, None)
 
     def sum_inputs(self, *args, **kwargs) -> VoltageType:
         output = 0
         for node in self.master_nodes.values():
-            output += node.output.copy()
+            output = output + node.output.copy()  # do not use +=
 
         return np.asarray(output, dtype=VOLTAGE_DTYPE)
 
diff --git a/paibox/types.py b/paibox/types.py
index 82020805..4392684e 100644
--- a/paibox/types.py
+++ b/paibox/types.py
@@ -23,11 +23,10 @@
 VOLTAGE_DTYPE = np.int32
 NEUOUT_SPIKE_DTYPE = np.bool_
 NEUOUT_U8_DTYPE = np.uint8
-NEUOUT_DTYPE = Union[NEUOUT_SPIKE_DTYPE, NEUOUT_U8_DTYPE]
 
 LeakVType: TypeAlias = NDArray[LEAK_V_DTYPE]
 SpikeType: TypeAlias = NDArray[SPIKE_DTYPE]
 SynOutType: TypeAlias = NDArray[VOLTAGE_DTYPE]
 VoltageType: TypeAlias = NDArray[VOLTAGE_DTYPE]
-NeuOutType: TypeAlias = NDArray[NEUOUT_DTYPE]
+NeuOutType: TypeAlias = NDArray[NEUOUT_U8_DTYPE]
 WeightType: TypeAlias = NDArray[Union[np.bool_, np.int8]]

From 1c721d67620f8c4b20fa51bfccd1fb9bcc2c0cdd Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 5 Jul 2024 14:33:38 +0800
Subject: [PATCH 005/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20the=20u?=
 =?UTF-8?q?pdate=20types=20of=20projection=20&=20simplify=20output=20handl?=
 =?UTF-8?q?ing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/projection.py | 90 +++++++++++++++------------------
 1 file changed, 41 insertions(+), 49 deletions(-)

diff --git a/paibox/components/projection.py b/paibox/components/projection.py
index 73e25ad7..0872e501 100644
--- a/paibox/components/projection.py
+++ b/paibox/components/projection.py
@@ -1,7 +1,7 @@
 import inspect
 import sys
 from collections.abc import Callable
-from typing import Optional, Union
+from typing import Literal, Optional, Union
 
 import numpy as np
 
@@ -14,11 +14,12 @@
 from paibox.context import _FRONTEND_CONTEXT
 from paibox.exceptions import ShapeError, SimulationError
 from paibox.mixin import TimeRelatedNode
-from paibox.types import DataType, Shape, SpikeType
+from paibox.types import NEUOUT_U8_DTYPE, DataType, NeuOutType, Shape
 from paibox.utils import as_shape, shape2num
 
 __all__ = ["InputProj"]
 
+L = Literal
 P = ParamSpec("P")
 
 
@@ -26,12 +27,27 @@ def _func_bypass(x: DataType) -> DataType:
     return x
 
 
-class Projection(DynamicSys):
-    def __call__(self, *args, **kwargs) -> SpikeType:
+class Projection(DynamicSys, TimeRelatedNode):
+    def __call__(self, *args, **kwargs) -> NeuOutType:
         return self.update(*args, **kwargs)
 
+    @property
+    def delay_relative(self) -> int:
+        return 1  # Fixed
+
+    @property
+    def tick_wait_start(self) -> int:
+        return 1  # Fixed
+
+    @property
+    def tick_wait_end(self) -> int:
+        return 0  # Fixed
+
+
+class InputProj(Projection):
+    # TODO Since the input port can be equivalent to the output of a neuron, is it more appropriate
+    # to use a neuron as an input port?
 
-class InputProj(Projection, TimeRelatedNode):
     def __init__(
         self,
         input: Optional[Union[DataType, Callable[P, DataType]]],
@@ -43,9 +59,8 @@ def __init__(
         """The input node of network.
 
         Arguments:
-            - input: the input value of the projection node. It can be numeric value or callable\
-                function(function or `Encoder`).
-            - shape_out: the shape of the output.
+            - input: the input value of the projection node. It can be a numeric value or a callable function.
+            - shape_out: the shape of the output..
             - keep_shape: wether to keep the shape when retieving the feature map.
             - name: the name of the node. Optional.
         """
@@ -59,42 +74,31 @@ def __init__(
             self._func_input = input
         else:  # Numeric input
             self._num_input = input
-            self._func_input = _func_bypass
+            self._func_input = None
 
         self._shape = as_shape(shape_out)
         self.keep_shape = keep_shape
+        self.set_memory("_neu_out", np.zeros((self.num_out,), dtype=NEUOUT_U8_DTYPE))
 
-        self.set_memory("_inner_spike", np.zeros((self.num_out,), dtype=np.bool_))
+    def update(self, *args, **kwargs) -> NeuOutType:
+        _input = self._get_neumeric_input(**kwargs)
 
-    def update(self, **kwargs) -> SpikeType:
-        _spike = self._get_neumeric_input(**kwargs)
-
-        if isinstance(_spike, (int, np.bool_, np.integer)):
-            # XXX In order to simplify the situation where one neuron is connected to
-            # multiple axons in the simulation (the actual input node output size is 8),
-            # one input node is temporarily allowed to output 8 bits of data.
-            if isinstance(_spike, (np.bool_, np.integer)):
-                _dtype = _spike.dtype
-            else:
-                _dtype = np.int8
-
-            self._inner_spike = np.full((self.num_out,), _spike, dtype=_dtype)
-
-        elif isinstance(_spike, np.ndarray):
-            if shape2num(_spike.shape) != self.num_out:
+        if isinstance(_input, (int, np.bool_, np.integer)):
+            self._neu_out = np.full_like(self._neu_out, _input, dtype=NEUOUT_U8_DTYPE)
+        elif isinstance(_input, np.ndarray):
+            if _input.size != self._neu_out.size:
                 raise ShapeError(
-                    f"cannot reshape output value from {_spike.shape} to ({self.num_out},)."
+                    f"cannot reshape output value from {_input.shape} to {self._neu_out.shape}."
                 )
-            self._inner_spike = _spike.ravel()
-
+            self._neu_out = _input.ravel().astype(NEUOUT_U8_DTYPE)
         else:
             # should never be reached
             raise TypeError(
                 f"expected type int, np.bool_, np.integer or np.ndarray, "
-                f"but got {_spike}, type {type(_spike)}."
+                f"but got {_input}, type {type(_input)}."
             )
 
-        return self._inner_spike
+        return self._neu_out
 
     def reset_state(self) -> None:
         self.reset_memory()  # Call reset of `StatusMemory`.
@@ -149,28 +153,16 @@ def input(self, value: DataType) -> None:
         self._num_input = value
 
     @property
-    def output(self) -> SpikeType:
-        return self._inner_spike
-
-    @property
-    def spike(self) -> SpikeType:
-        return self._inner_spike
+    def output(self) -> NeuOutType:
+        return self._neu_out
 
     @property
-    def feature_map(self) -> SpikeType:
-        return self.output.reshape(self.varshape)
+    def spike(self) -> NeuOutType:
+        return self._neu_out
 
     @property
-    def delay_relative(self) -> int:
-        return 1  # Fixed
-
-    @property
-    def tick_wait_start(self) -> int:
-        return 1  # Fixed
-
-    @property
-    def tick_wait_end(self) -> int:
-        return 0  # Fixed
+    def feature_map(self) -> NeuOutType:
+        return self._neu_out.reshape(self.varshape)
 
 
 def _call_with_ctx(f: Callable[..., DataType], *args, **kwargs) -> DataType:

From f604bf3c372693ac42f1955ae63b0b8d83266220 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 5 Jul 2024 14:34:21 +0800
Subject: [PATCH 006/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20the=20u?=
 =?UTF-8?q?pdate=20types=20of=20fmodules=20&=20simplify=20output=20handlin?=
 =?UTF-8?q?g?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/functional.py | 80 ++++++++++++++++++-------------
 paibox/components/modules.py    | 84 +++++++++++++++++++++++----------
 2 files changed, 104 insertions(+), 60 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 31df789c..764ce2c5 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -4,13 +4,20 @@
 from typing import Literal, Optional, Union
 
 import numpy as np
-from numpy.typing import NDArray
 from paicorelib import NTM, RM, TM
 
 from paibox.base import NeuDyn, NodeList
 from paibox.exceptions import PAIBoxDeprecationWarning, ShapeError
 from paibox.network import DynSysGroup
-from paibox.types import IntScalarType, SpikeType, VoltageType
+from paibox.types import (
+    NEUOUT_U8_DTYPE,
+    VOLTAGE_DTYPE,
+    IntScalarType,
+    NeuOutType,
+    SpikeType,
+    VoltageType,
+    WeightType,
+)
 from paibox.utils import (
     arg_check_non_neg,
     arg_check_pos,
@@ -26,6 +33,7 @@
     FunctionalModule2to1WithV,
     FunctionalModuleWithV,
     TransposeModule,
+    set_rt_mode,
 )
 from .neuron import Neuron
 from .neuron.neurons import *
@@ -57,6 +65,7 @@
 ]
 
 
+@set_rt_mode(1, 1, 1)
 class BitwiseAND(FunctionalModule2to1):
     inherent_delay = 0
 
@@ -89,7 +98,7 @@ def __init__(
         """
         super().__init__(neuron_a, neuron_b, keep_shape=keep_shape, name=name, **kwargs)
 
-    def spike_func(self, x1: SpikeType, x2: SpikeType, **kwargs) -> SpikeType:
+    def spike_func(self, x1: NeuOutType, x2: NeuOutType, **kwargs) -> NeuOutType:
         return x1 & x2
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
@@ -126,6 +135,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(1, 1, 1)
 class BitwiseNOT(FunctionalModule):
     inherent_delay = 0
 
@@ -157,8 +167,8 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, x1: SpikeType, **kwargs) -> SpikeType:
-        return ~x1
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        return x1 == 0  # x1 is an array in uint8
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         n1_not = LIF(
@@ -187,6 +197,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(1, 1, 1)
 class BitwiseOR(FunctionalModule2to1):
     inherent_delay = 0
 
@@ -209,7 +220,7 @@ def __init__(
         """
         super().__init__(neuron_a, neuron_b, keep_shape=keep_shape, name=name, **kwargs)
 
-    def spike_func(self, x1: SpikeType, x2: SpikeType, **kwargs) -> SpikeType:
+    def spike_func(self, x1: NeuOutType, x2: NeuOutType, **kwargs) -> NeuOutType:
         return x1 | x2
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
@@ -243,6 +254,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(1, 1, 1)
 class BitwiseXOR(FunctionalModule2to1):
     inherent_delay = 1
 
@@ -266,7 +278,7 @@ def __init__(
         """
         super().__init__(neuron_a, neuron_b, keep_shape=keep_shape, name=name, **kwargs)
 
-    def spike_func(self, x1: SpikeType, x2: SpikeType, **kwargs) -> SpikeType:
+    def spike_func(self, x1: NeuOutType, x2: NeuOutType, **kwargs) -> NeuOutType:
         return x1 ^ x2
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
@@ -364,7 +376,7 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, x1: SpikeType, **kwargs) -> SpikeType:
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return x1
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
@@ -417,6 +429,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(1, 1, 1)
 class SpikingAdd(FunctionalModule2to1WithV):
     inherent_delay = 0
 
@@ -457,12 +470,12 @@ def __init__(
 
         super().__init__(neuron_a, neuron_b, keep_shape=keep_shape, name=name, **kwargs)
 
-    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[SpikeType, VoltageType]:
+    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageType]:
         """Simplified neuron computing mechanism as the operator function."""
         return _spike_func_sadd_ssub(vjt, self.pos_threshold, self.reset_v)
 
     def synaptic_integr(
-        self, x1: SpikeType, x2: SpikeType, vjt_pre: VoltageType
+        self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
     ) -> VoltageType:
         return _sum_inputs_sadd_ssub(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
@@ -501,6 +514,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(1, 1, 1)
 class _SpikingPool2dWithV(FunctionalModuleWithV):
     inherent_delay = 0
 
@@ -547,11 +561,11 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[SpikeType, VoltageType]:
+    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageType]:
         return _spike_func_avg_pool(vjt, self.pos_thres)
 
-    def synaptic_integr(self, x1: SpikeType, vjt_pre: VoltageType) -> VoltageType:
-        return vjt_overflow((vjt_pre + self.tfm(x1).ravel()).astype(np.int32))
+    def synaptic_integr(self, x1: NeuOutType, vjt_pre: VoltageType) -> VoltageType:
+        return vjt_overflow(vjt_pre + self.tfm(x1).ravel())
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         n1_ap2d = IF(
@@ -579,6 +593,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(1, 1, 1)
 class _SpikingPool2d(FunctionalModule):
     inherent_delay = 0
 
@@ -629,7 +644,7 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, x1: SpikeType, **kwargs) -> SpikeType:
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return self.tfm(x1)
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
@@ -664,12 +679,6 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         generated = [n1_p2d, syn1]
         self._rebuild_out_intf(network, n1_p2d, *generated, **build_options)
 
-        # for syns in self.module_intf.output:
-        #     syns.source = n1_p2d
-
-        # network._add_components(*generated)
-        # network._remove_components(self)
-
         return generated
 
 
@@ -774,6 +783,7 @@ def __init__(
         )
 
 
+@set_rt_mode(1, 1, 1)
 class SpikingSub(FunctionalModule2to1WithV):
     inherent_delay = 0
     factor_a: int = 1
@@ -803,12 +813,12 @@ def __init__(
         self.overflow_strict = overflow_strict
         super().__init__(neuron_a, neuron_b, keep_shape=keep_shape, name=name, **kwargs)
 
-    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[SpikeType, VoltageType]:
+    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageType]:
         """Simplified neuron computing mechanism to generate output spike."""
         return _spike_func_sadd_ssub(vjt, self.pos_threshold)
 
     def synaptic_integr(
-        self, x1: SpikeType, x2: SpikeType, vjt_pre: VoltageType
+        self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
     ) -> VoltageType:
         return _sum_inputs_sadd_ssub(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
@@ -852,6 +862,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
     "'Transpose2d' will be removed in a future version. Use 'MatMul2d' instead.",
     category=PAIBoxDeprecationWarning,
 )
+@set_rt_mode(1, 1, 1)
 class Transpose2d(TransposeModule):
     def __init__(
         self,
@@ -877,7 +888,7 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, x1: SpikeType, **kwargs) -> SpikeType:
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         _x1 = x1.reshape(self.shape_in)
 
         return _x1.T
@@ -910,6 +921,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
     "'Transpose3d' will be removed in a future version. Use 'MatMul2d' instead.",
     category=PAIBoxDeprecationWarning,
 )
+@set_rt_mode(1, 1, 1)
 class Transpose3d(TransposeModule):
     def __init__(
         self,
@@ -940,7 +952,7 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, x1: SpikeType, **kwargs) -> SpikeType:
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         _x1 = x1.reshape(self.shape_in)
 
         return _x1.transpose(self.axes)
@@ -971,7 +983,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 def _spike_func_sadd_ssub(
     vjt: VoltageType, pos_thres: int, reset_v: Optional[int] = None
-) -> tuple[SpikeType, VoltageType]:
+) -> tuple[NeuOutType, VoltageType]:
     """Function `spike_func()` in spiking addition & subtraction."""
     # Fire
     thres_mode = np.where(
@@ -986,14 +998,14 @@ def _spike_func_sadd_ssub(
         v_reset = np.where(thres_mode == TM.EXCEED_POSITIVE, reset_v, vjt)
 
     # Spike
-    spike = np.equal(thres_mode, TM.EXCEED_POSITIVE)
+    spike = thres_mode == TM.EXCEED_POSITIVE
 
-    return spike, v_reset
+    return spike.astype(NEUOUT_U8_DTYPE), v_reset
 
 
 def _spike_func_avg_pool(
     vjt: VoltageType, pos_thres: int
-) -> tuple[SpikeType, VoltageType]:
+) -> tuple[NeuOutType, VoltageType]:
     """Function `spike_func()` in spiking addition & subtraction."""
     # Fire
     thres_mode = np.where(
@@ -1001,18 +1013,18 @@ def _spike_func_avg_pool(
         TM.EXCEED_POSITIVE,
         np.where(vjt < 0, TM.EXCEED_NEGATIVE, TM.NOT_EXCEEDED),
     )
-    spike = np.equal(thres_mode, TM.EXCEED_POSITIVE)
+    spike = thres_mode == TM.EXCEED_POSITIVE
     # Reset
     v_reset = np.where(thres_mode == TM.EXCEED_POSITIVE, 0, vjt)
 
-    return spike, v_reset
+    return spike.astype(NEUOUT_U8_DTYPE), v_reset
 
 
 def _sum_inputs_sadd_ssub(
-    x1: SpikeType, x2: SpikeType, f1: int, f2: int, vjt_pre: VoltageType, strict: bool
+    x1: NeuOutType, x2: NeuOutType, f1: int, f2: int, vjt_pre: VoltageType, strict: bool
 ) -> VoltageType:
     """Function `sum_input()` for spiking addition & subtraction."""
-    incoming_v = (vjt_pre + x1 * f1 + x2 * f2).astype(np.int32)
+    incoming_v = (vjt_pre + x1 * f1 + x2 * f2).astype(VOLTAGE_DTYPE)
     return vjt_overflow(incoming_v, strict)
 
 
@@ -1029,7 +1041,7 @@ def _shape_check(shape: tuple[int, ...], ndim: int) -> tuple[int, ...]:
 _shape_ndim3_check = partial(_shape_check, ndim=3)
 
 
-def _transpose2d_mapping(op_shape: tuple[int, ...]) -> NDArray[np.bool_]:
+def _transpose2d_mapping(op_shape: tuple[int, ...]) -> WeightType:
     """Get the mapping matrix for transpose of 2d array.
 
     Argument:
@@ -1048,7 +1060,7 @@ def _transpose2d_mapping(op_shape: tuple[int, ...]) -> NDArray[np.bool_]:
 
 def _transpose3d_mapping(
     op_shape: tuple[int, ...], axes: tuple[int, ...]
-) -> NDArray[np.bool_]:
+) -> WeightType:
     """Get the mapping matrix for transpose of 3d array.
 
     Argument:
diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 64f114e8..6d879593 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -3,16 +3,24 @@
 from collections import deque
 from collections.abc import Sequence
 from dataclasses import dataclass, field
-from typing import ClassVar, Optional, Union
+from typing import ClassVar, Literal, Optional, TypeVar, Union
 
 import numpy as np
-from paicorelib import TM, HwConfig
+from paicorelib import (
+    InputWidthFormat,
+    SpikeWidthFormat,
+    TM,
+    HwConfig,
+    SNNModeEnable,
+    get_core_mode,
+)
 
 from paibox.base import NeuDyn
 from paibox.exceptions import NotSupportedError, RegisterError, ShapeError
-from paibox.types import SpikeType, VoltageType
+from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
+from .neuron.utils import _input_width_format, _spike_width_format
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
@@ -28,7 +36,7 @@
 
 __all__ = ["BuildingModule"]
 
-MultiInputsType: TypeAlias = list[SpikeType]  # Type of inputs of `NeuModule`.
+MultiInputsType: TypeAlias = list[NeuOutType]  # Type of inputs of `NeuModule`.
 BuiltComponentType: TypeAlias = list[Union["FullConnectedSyn", "Neuron"]]
 
 
@@ -85,10 +93,13 @@ def n_output(self) -> int:
 class NeuModule(NeuDyn, BuildingModule):
     __gh_build_ignore__ = True
 
-    n_return: ClassVar[int]
+    n_return: ClassVar[int] = 1
     """#N of outputs."""
     inherent_delay: int = 0
     """Internal delay of the module, relative to the external."""
+    input_width: ClassVar[InputWidthFormat] = InputWidthFormat.WIDTH_1BIT
+    spike_width: ClassVar[SpikeWidthFormat] = SpikeWidthFormat.WIDTH_1BIT
+    snn_en: ClassVar[SNNModeEnable] = SNNModeEnable.ENABLE
 
     def __init__(
         self,
@@ -177,25 +188,30 @@ def __init__(
 
         super().__init__(**kwargs, name=name)
 
+        self.mode = get_core_mode(self.input_width, self.spike_width, self.snn_en)
         self.keep_shape = keep_shape
         self._shape_out = shape_out
         self.register_operand(*operands)
 
         # Set memory for only 1 output node.
         # TODO how to handle with more than 1 output nodes
-        self.set_memory("_inner_spike", np.zeros((self.num_out,), dtype=np.bool_))
+        self.set_memory("_neu_out", np.zeros((self.num_out,), dtype=NEUOUT_U8_DTYPE))
         # Delay registers
         self.set_memory(
             "delay_registers",
             np.zeros(
-                (HwConfig.N_TIMESLOT_MAX,) + self._inner_spike.shape, dtype=np.bool_
+                (HwConfig.N_TIMESLOT_MAX,) + self._neu_out.shape, dtype=NEUOUT_U8_DTYPE
             ),
         )
         # Set a deque for the `synin` to implement the delay of `inherent_delay` for the module.
         if self.inherent_delay > 0:
             _init_synin = [
                 self.n_op
-                * [np.zeros(self.module_intf.operands[0].num_out, dtype=np.bool_)]
+                * [
+                    np.zeros(
+                        self.module_intf.operands[0].num_out, dtype=NEUOUT_U8_DTYPE
+                    )
+                ]
             ]
         else:
             _init_synin = []
@@ -211,32 +227,32 @@ def get_inputs(self) -> None:
             # Retrieve the spike at index `timestamp` of the dest neurons
             if self.is_working():
                 if isinstance(op, InputProj):
-                    synin.append(op.output.copy())
+                    synin.append(op.output)
                 else:
                     idx = self.timestamp % HwConfig.N_TIMESLOT_MAX
-                    synin.append(op.output[idx].copy())
+                    synin.append(op.delay_registers[idx])
             else:
                 # Retrieve 0 to the dest neurons if it is not working
                 synin.append(np.zeros_like(op.spike))
 
         self.synin_deque.append(synin)  # Append to the right of the deque.
 
-    def update(self, *args, **kwargs) -> Optional[SpikeType]:
+    def update(self, *args, **kwargs) -> Optional[NeuOutType]:
         if not self.is_working():
-            self._inner_spike = np.zeros((self.num_out,), dtype=np.bool_)
+            self._neu_out.fill(0)
             return None
 
         self.get_inputs()
 
         if self.is_outputing():
             synin = self.synin_deque.popleft()  # Pop the left of the deque.
-            self._inner_spike = self.spike_func(*synin).ravel()
+            self._neu_out = self.spike_func(*synin).ravel()
             idx = (
                 self.timestamp - self.inherent_delay + self.delay_relative - 1
             ) % HwConfig.N_TIMESLOT_MAX
-            self.delay_registers[idx] = self._inner_spike.copy()
+            self.delay_registers[idx] = self._neu_out.copy()
 
-        return self._inner_spike
+        return self._neu_out
 
     def _rebuild_out_intf(
         self,
@@ -275,16 +291,16 @@ def num_out(self) -> int:
         return shape2num(self._shape_out)
 
     @property
-    def output(self) -> SpikeType:
+    def output(self) -> NeuOutType:
         return self.delay_registers
 
     @property
-    def spike(self) -> SpikeType:
-        return self._inner_spike
+    def spike(self) -> NeuOutType:
+        return self._neu_out
 
     @property
-    def feature_map(self) -> SpikeType:
-        return self._inner_spike.reshape(self.varshape)
+    def feature_map(self) -> NeuOutType:
+        return self._neu_out.reshape(self.varshape)
 
     @property
     def varshape(self) -> tuple[int, ...]:
@@ -383,11 +399,13 @@ def __init__(
 
     def synaptic_integr(self, *args, **kwargs) -> VoltageType:
         """Functions used to describe synaptic integration of the module."""
-        raise NotImplementedError
+        raise NotImplementedError(
+            "'synaptic_integr' should be implemented in the subclasses."
+        )
 
-    def update(self, *args, **kwargs) -> Optional[SpikeType]:
+    def update(self, *args, **kwargs) -> Optional[NeuOutType]:
         if not self.is_working():
-            self._inner_spike = np.zeros((self.num_out,), dtype=np.bool_)
+            self._neu_out.fill(0)
             return None
 
         self.get_inputs()
@@ -396,14 +414,14 @@ def update(self, *args, **kwargs) -> Optional[SpikeType]:
             synin = self.synin_deque.popleft()  # Pop the left of the deque.
             incoming_v = self.synaptic_integr(*synin, self._vjt)
             _is, self._vjt = self.spike_func(incoming_v)
-            self._inner_spike = _is.ravel()
+            self._neu_out = _is.ravel()
 
             idx = (
                 self.timestamp - self.inherent_delay + self.delay_relative - 1
             ) % HwConfig.N_TIMESLOT_MAX
-            self.delay_registers[idx] = self._inner_spike.copy()
+            self.delay_registers[idx] = self._neu_out.copy()
 
-        return self._inner_spike
+        return self._neu_out
 
     @property
     def voltage(self) -> VoltageType:
@@ -429,6 +447,20 @@ def __init__(
         )
 
 
+L = Literal
+_T = TypeVar("_T", bound=NeuModule)
+
+
+def set_rt_mode(input_width: L[1, 8], spike_width: L[1, 8], snn_en: L[0, 1]):
+    def wrapper(cls: type[_T]) -> type[_T]:
+        cls.input_width = _input_width_format(input_width)
+        cls.spike_width = _spike_width_format(spike_width)
+        cls.snn_en = SNNModeEnable(snn_en)
+        return cls
+
+    return wrapper
+
+
 def _shape_check2(
     neuron_a: Union[NeuDyn, InputProj],
     neuron_b: Union[NeuDyn, InputProj],

From d08a11e62d0792df88931c8599379a07e4d28afa Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 5 Jul 2024 14:35:05 +0800
Subject: [PATCH 007/187] =?UTF-8?q?=E2=9C=85=20skip=20the=20tests=20of=20d?=
 =?UTF-8?q?eprecated=20fmodules?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 7b4b4530..9913fb11 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -551,6 +551,7 @@ def test_SpikingPool2dWithV_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
     @pytest.mark.parametrize("shape", [(32, 16), (1, 32), (64,), (128, 1), 48])
     def test_Transpose2d(self, shape):
         from tests.shared_networks import TransposeModule_T2d_Net
@@ -581,6 +582,7 @@ def test_Transpose2d(self, shape):
             expected = inpa[i - 2].T.ravel()
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
 
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
     def test_Transpose2d_mapping(self, ensure_dump_dir):
         from tests.shared_networks import TransposeModule_T2d_Net
 
@@ -591,6 +593,7 @@ def test_Transpose2d_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
     @pytest.mark.parametrize(
         "shape, axes",
         [
@@ -632,6 +635,7 @@ def test_Transpose3d(self, shape, axes):
             expected = inpa[i - 2].transpose(axes).ravel()
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
 
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
     def test_Transpose3d_mapping(self, ensure_dump_dir):
         from tests.shared_networks import TransposeModule_T3d_Net
 

From 43369c2f44e87004d1b3be23761917880ef1b38f Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 8 Jul 2024 17:26:50 +0800
Subject: [PATCH 008/187] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Unify=20weight=20d?=
 =?UTF-8?q?ata=20types=20&=20optimize=20weight=20precision=20processing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🎨 update typing
---
 paibox/backend/placement.py                  | 20 ++---
 paibox/components/synapses/transforms.py     | 51 ++++++-----
 paibox/types.py                              |  5 +-
 tests/backend/conftest.py                    | 12 +--
 tests/backend/test_mapper.py                 |  8 +-
 tests/components/synapses/test_synapses.py   | 37 ++++----
 tests/components/synapses/test_transforms.py | 89 +++++++++-----------
 7 files changed, 105 insertions(+), 117 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index d4e6554e..809b3eb8 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -8,7 +8,7 @@
 
 from paibox.components import FullConnectedSyn, Neuron
 from paibox.exceptions import GraphBuildError, ResourceError, TruncationWarning
-from paibox.types import WeightType
+from paibox.types import WeightType, WEIGHT_DTYPE
 from paibox.utils import check_attr_same, count_unique_elem
 
 from .conf_template import (
@@ -313,9 +313,11 @@ def raw_weight_of_dest(self) -> list[WeightType]:
                     w_of_dest.append(syn.connectivity)
                 else:
                     # Fill with 0.
-                    w_of_dest.append(np.zeros((s.num_out, d.num_in), dtype=np.int8))
+                    w_of_dest.append(
+                        np.zeros((s.num_out, d.num_in), dtype=WEIGHT_DTYPE)
+                    )
 
-            w_dest = np.vstack(w_of_dest, dtype=np.int8)
+            w_dest = np.vstack(w_of_dest)
             w_of_neurons.append(w_dest)
 
         # Check
@@ -430,9 +432,8 @@ def _fold_raw_weights(self, raw_weights: list[WeightType]) -> WeightType:
         n_fold = self.n_timeslot
 
         if self.lcn_ex == LCN_EX.LCN_1X:
-            w_folded = np.hstack(raw_weights, dtype=np.int8)
+            w_folded = np.hstack(raw_weights)
             w_folded.setflags(write=False)
-
             return w_folded
 
         # LCN_EX > LCN_1X
@@ -455,12 +456,11 @@ def _fold_raw_weights(self, raw_weights: list[WeightType]) -> WeightType:
                 )
                 w_folded_of_axon_segs.append(w_folded_of_axon_seg)
 
-            w_folded = np.vstack(w_folded_of_axon_segs, dtype=np.int8)
+            w_folded = np.vstack(w_folded_of_axon_segs)
             w_folded_list.append(w_folded)
 
-        w_folded = np.hstack(w_folded_list, dtype=np.int8)
+        w_folded = np.hstack(w_folded_list)
         w_folded.setflags(write=False)
-
         return w_folded
 
     def _weight_ram_mapping(self) -> WeightRamType:
@@ -528,7 +528,7 @@ def _nfold_weight(
 
             _raw_weight = np.append(
                 raw_weight,
-                np.zeros((n_row_padding, raw_col), dtype=np.int8),
+                np.zeros((n_row_padding, raw_col), dtype=WEIGHT_DTYPE),
                 axis=0,
             )
         else:
@@ -539,7 +539,7 @@ def _nfold_weight(
         # Check #2
         # assert _raw_weight.shape[0] == expected_row * n_fold
 
-        w_folded = np.zeros((expected_row, raw_col * n_fold), dtype=np.int8)
+        w_folded = np.zeros((expected_row, raw_col * n_fold), dtype=WEIGHT_DTYPE)
 
         for i, j in np.ndindex((n_fold, raw_col)):
             w_col = w_splited[i][:, j]
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 64a6b8ce..78807f2d 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -7,12 +7,13 @@
 
 from paibox.exceptions import AutoOptimizationWarning, ShapeError
 from paibox.types import (
+    WEIGHT_DTYPE,
     DataArrayType,
     IntScalarType,
     NeuOutType,
     SynOutType,
     WeightType,
-    VOLTAGE_DTYPE,
+    VOLTAGE_DTYPE
 )
 from paibox.utils import is_shape, shape2num, typical_round
 
@@ -70,7 +71,7 @@ def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
     """Convert raw weights to `np.ndarray` coarsely (without optimization).
 
     Description:
-        - For weights of type `bool` or `np.bool_`, set `np.bool_` as the dtype.
+        - For weights of type `bool` or `np.bool_`, set `np.int8` as the dtype.
         - For integer scalar weight, set the dtype according to its value.
         - For array weights, set the dtype according to its minimum & maximum values. For weights in the\
             range of int8, the dtype when declared will be followed (i.e. not optimized).
@@ -83,12 +84,7 @@ def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
         if raw_w > MAX_INT8 or raw_w < MIN_INT8:
             raise ValueError(f"weight out of range int8, got {raw_w}.")
 
-        if raw_w <= MAX_INT1 and raw_w >= MIN_INT1:
-            _dtype = np.bool_
-        else:
-            _dtype = np.int8
-
-        return np.asarray(raw_w, dtype=_dtype)
+        return np.asarray(raw_w, dtype=WEIGHT_DTYPE)
 
     # Convert list or tuple to np.ndarray
     _array = np.asarray(raw_w)
@@ -103,10 +99,10 @@ def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
             f"dtype of weight is optimized automatically, {_array.dtype} -> int8.",
             AutoOptimizationWarning,
         )
-        _dtype = np.int8
+        _dtype = WEIGHT_DTYPE
 
     elif _array.dtype == np.bool_ or _array.dtype == np.int8:
-        _dtype = _array.dtype
+        _dtype = WEIGHT_DTYPE
     else:
         raise TypeError(f"weights must be bool or int8, but got {_array.dtype}.")
 
@@ -128,17 +124,13 @@ def _get_weight_precision(weight: WeightType, enable_wp_opt: bool) -> WP:
         else:
             return WP.WEIGHT_WIDTH_8BIT
     else:
-        # If weight precision opt is disabled, return WP1 if dtype is np.bool_ else WP8.
-        if weight.dtype == np.bool_:
-            return WP.WEIGHT_WIDTH_1BIT
-        else:
-            return WP.WEIGHT_WIDTH_8BIT
+        return WP.WEIGHT_WIDTH_8BIT
 
 
 class Transform:
     def __init__(self, weights: DataArrayType) -> None:
         self.weights = _set_coarse_dtype(weights)
-        """The actual weights in synapses. Stored in `np.bool_` or `np.int8` format."""
+        """The actual weights in synapses. Stored in np.int8 format."""
 
         self.weights.setflags(write=False)
 
@@ -195,7 +187,7 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
     @property
     def connectivity(self):
         return (
-            (self.weights * np.identity(self.num, dtype=np.bool_))
+            (self.weights * np.identity(self.num, dtype=WEIGHT_DTYPE))
             if self.weights.ndim == 0
             else np.diag(self.weights)
         )
@@ -259,7 +251,7 @@ def connectivity(self):
         return (
             self.weights
             if self.weights.ndim == 2
-            else (self.weights * np.ones(self.conn_size, dtype=np.bool_))
+            else (self.weights * np.ones(self.conn_size, dtype=WEIGHT_DTYPE))
         )
 
 
@@ -305,7 +297,7 @@ def _matmul_unroll(
         n_oshape = shape2num(out_shape)
         in_shape_t = tuple(in_shape[i] for i in axes)
 
-        w_unrolled = np.zeros((n_ishape, n_oshape), dtype=weights.dtype)
+        w_unrolled = np.zeros((n_ishape, n_oshape), dtype=WEIGHT_DTYPE)
 
         orig_idx = np.arange(n_ishape).reshape(in_shape_t)
         mapping_tbl = orig_idx.transpose(np.argsort(axes)).ravel()
@@ -560,3 +552,24 @@ def connectivity(self):
             self.stride,
             self.padding,
         )
+
+
+class _CompareMax(AllToAll):
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
+        """The maximum value of the input corresponding to the non-zero columns of the weight matrix is \
+            taken as the output.
+            x = (x1, x2, ..., xn)
+            w = [n*m]
+            y = (y1, y2, ..., ym)
+        """
+        if self.weights.ndim == 0:
+            output = np.full(
+                (self.conn_size[1],), np.max(x, axis=None), dtype=VOLTAGE_DTYPE
+            )
+        else:
+            output = np.zeros((self.conn_size[1],), dtype=VOLTAGE_DTYPE)
+            for col in range(self.conn_size[1]):
+                non_zero_idx = np.nonzero(self.weights[:, col])[0]
+                output[col] = np.max(x[non_zero_idx])
+
+        return output
diff --git a/paibox/types.py b/paibox/types.py
index 4392684e..f5a90365 100644
--- a/paibox/types.py
+++ b/paibox/types.py
@@ -1,5 +1,5 @@
 import sys
-from typing import TypeVar, Union
+from typing import TypeVar
 
 import numpy as np
 from numpy.typing import NDArray
@@ -21,6 +21,7 @@
 LEAK_V_DTYPE = np.int32
 SPIKE_DTYPE = np.bool_
 VOLTAGE_DTYPE = np.int32
+WEIGHT_DTYPE = np.int8
 NEUOUT_SPIKE_DTYPE = np.bool_
 NEUOUT_U8_DTYPE = np.uint8
 
@@ -29,4 +30,4 @@
 SynOutType: TypeAlias = NDArray[VOLTAGE_DTYPE]
 VoltageType: TypeAlias = NDArray[VOLTAGE_DTYPE]
 NeuOutType: TypeAlias = NDArray[NEUOUT_U8_DTYPE]
-WeightType: TypeAlias = NDArray[Union[np.bool_, np.int8]]
+WeightType: TypeAlias = NDArray[WEIGHT_DTYPE]
diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index cf27ad13..7d1285ce 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -1282,55 +1282,48 @@ class TestData:
     )
 
     cflags_weight_bit_opt_data = ParametrizedTestData(
-        args="range, scalar, dtype, expected_wp_noopt, expected_wp_opt",
+        args="range, scalar, dtype, expected_wp_opt",
         data=[
             (
                 ((0, 2), (0, 2)),
                 1,
                 (np.bool_, np.bool_),
                 WP.WEIGHT_WIDTH_1BIT,
-                WP.WEIGHT_WIDTH_1BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 -1,
                 (np.bool_, np.bool_),
-                WP.WEIGHT_WIDTH_8BIT,
                 WP.WEIGHT_WIDTH_2BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 1,
                 (np.bool_, np.int8),
-                WP.WEIGHT_WIDTH_8BIT,
                 WP.WEIGHT_WIDTH_1BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 -2,
                 (np.int8, np.bool_),
-                WP.WEIGHT_WIDTH_8BIT,
                 WP.WEIGHT_WIDTH_2BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 1,
                 (np.int8, np.int8),
-                WP.WEIGHT_WIDTH_8BIT,
                 WP.WEIGHT_WIDTH_1BIT,
             ),
             (
                 ((0, 2), (-2, 2)),
                 -8,
                 (np.bool_, np.int8),
-                WP.WEIGHT_WIDTH_8BIT,
                 WP.WEIGHT_WIDTH_4BIT,
             ),
             (
                 ((0, 2), (-2, 2)),
                 7,
                 (np.bool_, np.int8),
-                WP.WEIGHT_WIDTH_8BIT,
                 WP.WEIGHT_WIDTH_4BIT,
             ),
             (
@@ -1338,13 +1331,11 @@ class TestData:
                 127,
                 (np.bool_, np.int8),
                 WP.WEIGHT_WIDTH_8BIT,
-                WP.WEIGHT_WIDTH_8BIT,
             ),
             (
                 ((-2, 2), (-8, 8)),
                 7,
                 (np.int8, np.int8),
-                WP.WEIGHT_WIDTH_8BIT,
                 WP.WEIGHT_WIDTH_4BIT,
             ),
             (
@@ -1352,7 +1343,6 @@ class TestData:
                 -100,
                 (np.int8, np.int8),
                 WP.WEIGHT_WIDTH_8BIT,
-                WP.WEIGHT_WIDTH_8BIT,
             ),
         ],
     )
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 29b14572..21f3cabbc 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pytest
-from paicorelib import Coord, HwConfig
+from paicorelib import Coord, HwConfig, WeightPrecision as WP
 
 import paibox as pb
 from paibox.base import SynSys
@@ -542,9 +542,7 @@ class TestMapper_cflags:
         TestData.cflags_weight_bit_opt_data["args"],
         TestData.cflags_weight_bit_opt_data["data"],
     )
-    def test_cflags_weight_bit_opt(
-        self, range, scalar, dtype, expected_wp_noopt, expected_wp_opt
-    ):
+    def test_cflags_weight_bit_opt(self, range, scalar, dtype, expected_wp_opt):
         # s1, s2, s3 will be grouped in one core block.
         class Net(pb.Network):
             def __init__(self):
@@ -578,7 +576,7 @@ def __init__(self):
         mapper = pb.Mapper()
         mapper.build(net)
         mapper.compile(weight_bit_optimization=False)
-        assert mapper.core_blocks[0].weight_precision == expected_wp_noopt
+        assert mapper.core_blocks[0].weight_precision == WP.WEIGHT_WIDTH_8BIT
 
         mapper.clear()
         mapper.build(net)
diff --git a/tests/components/synapses/test_synapses.py b/tests/components/synapses/test_synapses.py
index 7e9b805d..6378b4de 100644
--- a/tests/components/synapses/test_synapses.py
+++ b/tests/components/synapses/test_synapses.py
@@ -7,6 +7,7 @@
 import paibox as pb
 from paibox.components import FullConnectedSyn
 from paibox.exceptions import RegisterError, ShapeError
+from paibox.types import WEIGHT_DTYPE
 from paibox.utils import shape2num
 
 
@@ -127,13 +128,9 @@ def test_FullConn_One2One_scalar(self, n1, n2, scalar_weight, expected_wp):
         assert (s1.num_in, s1.num_out) == (n1.num_out, n2.num_in)
         assert np.array_equal(
             s1.connectivity,
-            scalar_weight * np.identity(n1.num_out, dtype=np.int8),
-        )
-        assert (
-            s1.connectivity.dtype == np.int8
-            if expected_wp > WP.WEIGHT_WIDTH_1BIT
-            else np.bool_
+            scalar_weight * np.identity(n1.num_out, dtype=WEIGHT_DTYPE),
         )
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.weight_precision is expected_wp
 
     @pytest.mark.parametrize(
@@ -160,7 +157,7 @@ def test_FullConn_One2One_matrix(self):
         assert np.array_equal(
             s1.connectivity, np.array([[2, 0, 0], [0, 3, 0], [0, 0, 4]], dtype=np.int8)
         )
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.weight_precision is WP.WEIGHT_WIDTH_4BIT
 
         weight = np.array([1, 0, 1, 0], np.int8)
@@ -173,10 +170,10 @@ def test_FullConn_One2One_matrix(self):
         assert np.array_equal(
             s2.connectivity,
             np.array(
-                [[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]], dtype=np.bool_
+                [[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]], dtype=np.int16
             ),
         )
-        assert s2.connectivity.dtype == np.int8
+        assert s2.connectivity.dtype == WEIGHT_DTYPE
         assert s2.weight_precision is WP.WEIGHT_WIDTH_1BIT
 
     @pytest.mark.parametrize(
@@ -193,7 +190,7 @@ def test_FullConn_All2All(self, n1, n2):
         s1 = pb.FullConn(n1, n2, conn_type=pb.SynConnType.All2All)
 
         assert (s1.num_in, s1.num_out) == (n1.num_out, n2.num_in)
-        assert s1.connectivity.dtype == np.bool_
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert np.array_equal(s1.weights, 1)
         assert np.array_equal(s1.connectivity, np.ones((n1.num_out, n2.num_in)))
 
@@ -206,14 +203,14 @@ def test_FullConn_All2All_with_weights(self):
         s1 = pb.FullConn(n1, n2, weight, conn_type=pb.SynConnType.All2All)
 
         assert np.array_equal(s1.weights, weight)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.weight_precision is WP.WEIGHT_WIDTH_4BIT
 
         """2. Weights matrix."""
         weight = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
         s2 = pb.FullConn(n1, n2, weight, conn_type=pb.SynConnType.All2All)
 
-        assert s2.connectivity.dtype == np.int8
+        assert s2.connectivity.dtype == WEIGHT_DTYPE
         assert np.array_equal(s2.weights, weight)
         assert np.array_equal(s2.connectivity, weight)
 
@@ -266,7 +263,7 @@ def test_MatMul2d_instance(self, n1, n2, w_shape, expectation):
             s = pb.MatMul2d(n1, n2, weights=weights)
 
             assert (s.num_in, s.num_out) == (n1.num_out, n2.num_in)
-            assert s.connectivity.dtype == np.int8
+            assert s.connectivity.dtype == WEIGHT_DTYPE
             assert np.array_equal(s.weights, weights)
 
 
@@ -292,7 +289,7 @@ def test_Conv1d_instance(self):
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
             in_channels * shape2num(in_shape),
             out_channels * shape2num(out_shape),
@@ -320,7 +317,7 @@ def test_Conv2d_instance(self):
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
             in_channels * shape2num(in_shape),
             out_channels * shape2num(out_shape),
@@ -344,7 +341,7 @@ def test_Conv1d_inchannel_omitted(self):
         s1 = pb.Conv1d(n1, n2, weight, stride=stride, kernel_order=korder)
 
         assert s1.num_in == in_channels * shape2num(in_shape)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
             in_channels * shape2num(in_shape),
             out_channels * shape2num(out_shape),
@@ -403,7 +400,7 @@ def test_ConvTranspose1d_instance(self):
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
             in_channels * shape2num(in_shape),
             out_channels * shape2num(out_shape),
@@ -437,7 +434,7 @@ def test_ConvTranspose2d_instance(self):
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
             in_channels * shape2num(in_shape),
             out_channels * shape2num(out_shape),
@@ -471,7 +468,7 @@ def test_ConvTranspose1d_inchannel_omitted(self):
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
             in_channels * shape2num(in_shape),
             out_channels * shape2num(out_shape),
@@ -505,7 +502,7 @@ def test_ConvTranspose2d_inchannel_omitted(self):
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
-        assert s1.connectivity.dtype == np.int8
+        assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
             in_channels * shape2num(in_shape),
             out_channels * shape2num(out_shape),
diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index 9b7856ea..d94e21b9 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -3,48 +3,49 @@
 
 from paibox.components.synapses import transforms as tfm
 from paibox.exceptions import AutoOptimizationWarning
+from paibox.types import WEIGHT_DTYPE
 from paibox.utils import shape2num
 
 
 class TestTransforms:
     @pytest.mark.parametrize(
-        "weight, expected_dtype",
+        "weight",
         [
-            (np.array([1, 2, 3], dtype=np.int8), np.int8),
-            (np.array([1, 0, 1], dtype=np.bool_), np.bool_),
-            (np.array([True, False]), np.bool_),
-            (np.array([True, False], dtype=np.int8), np.int8),
-            (10, np.int8),
-            (1, np.bool_),
-            (True, np.bool_),
-            (np.int8(1), np.bool_),  # automatically optimizated
-            (np.uint8(99), np.int8),
-            (np.array([-128, 1, 127], dtype=np.int8), np.int8),
-            ([1, 2, 3], np.int8),
-            ((0, 1, 0, 1), np.int8),
+            np.array([1, 2, 3], dtype=np.int8),
+            np.array([1, 0, 1], dtype=np.bool_),
+            np.array([True, False]),
+            np.array([True, False], dtype=np.int8),
+            10,
+            1,
+            True,
+            np.int8(1),  # automatically optimizated
+            np.uint8(99),
+            np.array([-128, 1, 127], dtype=np.int8),
+            [1, 2, 3],
+            (0, 1, 0, 1),
         ],
     )
-    def test_weight_dtype_convert(self, weight, expected_dtype):
+    def test_weight_dtype_convert(self, weight):
         t = tfm.Transform(weight)
-        assert t.weights.dtype == expected_dtype
+        assert t.weights.dtype == WEIGHT_DTYPE
 
     @pytest.mark.parametrize(
-        "weight, expected_dtype",
+        "weight",
         [
-            (np.array([1, 2, 3]), np.int8),
+            np.array([1, 2, 3]),
             # Only automatically optimized to int8 unless specified as bool
-            (np.array([True, False], dtype=np.int16), np.int8),
-            (np.array([1, 0, 1], dtype=np.int16), np.int8),  # Same as above
-            (np.array([-128, 1, 127], dtype=np.int32), np.int8),
-            (np.array([-8, 4, 7]), np.int8),
-            ([-100, 0, 100], np.int8),
+            np.array([True, False], dtype=np.int16),
+            np.array([1, 0, 1], dtype=np.int16),  # Same as above
+            np.array([-128, 1, 127], dtype=np.int32),
+            np.array([-8, 4, 7]),
+            [-100, 0, 100],
         ],
     )
-    def test_weight_dtype_convert_warning(self, weight, expected_dtype):
+    def test_weight_dtype_convert_warning(self, weight):
         with pytest.warns(AutoOptimizationWarning):
             t = tfm.Transform(weight)
 
-        assert t.weights.dtype == expected_dtype
+        assert t.weights.dtype == WEIGHT_DTYPE
 
     @pytest.mark.parametrize(
         "weight",
@@ -111,15 +112,8 @@ def test_OneToOne(self):
         assert y.shape == (4,)
 
     @pytest.mark.parametrize(
-        "weight, expected_dtype",
-        [
-            (1, np.bool_),
-            (-1, np.int8),
-            (10, np.int8),
-            (-100, np.int8),
-            (-128, np.int8),
-            (127, np.int8),
-        ],
+        "weight",
+        [1, -1, 10, -100, -128, 127],
         ids=[
             "scalar_1",
             "scalar_-1",
@@ -129,7 +123,7 @@ def test_OneToOne(self):
             "scalar_-127",
         ],
     )
-    def test_AllToAll_weight_scalar(self, weight, expected_dtype):
+    def test_AllToAll_weight_scalar(self, weight):
         """Test `AllToAll` when weight is a scalar"""
 
         num_in, num_out = 10, 20
@@ -138,7 +132,7 @@ def test_AllToAll_weight_scalar(self, weight, expected_dtype):
         y = f(x)
         expected = np.full((num_out,), np.sum(x, axis=None), dtype=np.int32) * weight
 
-        assert f.connectivity.dtype == expected_dtype
+        assert f.connectivity.dtype == WEIGHT_DTYPE
         assert y.dtype == np.int32
         assert y.shape == (num_out,)
         assert y.ndim == 1
@@ -146,37 +140,32 @@ def test_AllToAll_weight_scalar(self, weight, expected_dtype):
         assert f.connectivity.shape == (num_in, num_out)
 
     @pytest.mark.parametrize(
-        "shape, x, weights, expected_dtype",
+        "shape, x, weights",
         [
             (
                 (3, 4),
                 np.random.randint(2, size=(3,), dtype=np.bool_),
                 np.random.randint(2, size=(3, 4), dtype=np.bool_),
-                np.bool_,
             ),
             (
                 (10, 20),
                 np.random.randint(2, size=(10,), dtype=np.bool_),
                 np.random.randint(127, size=(10, 20), dtype=np.int8),
-                np.int8,
             ),
             (
                 (20, 10),
                 np.random.randint(2, size=(20,), dtype=np.bool_),
                 np.random.randint(2, size=(20, 10), dtype=np.bool_),
-                np.bool_,
             ),
             (
                 (2, 2),
                 np.array([1, 1], dtype=np.bool_),
                 np.array([[1, 2], [3, 4]], dtype=np.int8),
-                np.int8,
             ),
             (
                 (2, 2),
                 np.array([1, 1], dtype=np.bool_),
                 np.array([[127, 0], [3, -128]], dtype=np.int8),
-                np.int8,
             ),
         ],
         ids=[
@@ -187,43 +176,43 @@ def test_AllToAll_weight_scalar(self, weight, expected_dtype):
             "weights_int8_4",
         ],
     )
-    def test_AllToAll_array(self, shape, x, weights, expected_dtype):
+    def test_AllToAll_array(self, shape, x, weights):
         """Test `AllToAll` when weights is an array"""
 
         f = tfm.AllToAll(shape, weights)
         y = f(x)
         expected = x @ weights.copy().astype(np.int32)
 
-        assert f.connectivity.dtype == expected_dtype
+        assert f.connectivity.dtype == WEIGHT_DTYPE
         assert np.array_equal(y, expected)
         assert f.connectivity.shape == shape
 
     @pytest.mark.parametrize(
-        "x, weights, expected_dtype",
+        "x, weights",
         [
             (
                 np.arange(12, dtype=np.int8).reshape(3, 4),
                 np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=np.int8),
-                np.int8,
             ),
             (
                 np.random.randint(2, size=(10,), dtype=np.bool_),
                 np.random.randint(-10, 10, size=(10, 20), dtype=np.int8),
-                np.int8,
             ),
             (
                 np.ones((20, 10), dtype=np.bool_),
                 np.random.randint(2, size=(20, 10), dtype=np.bool_),
-                np.bool_,
             ),
             (
                 np.array((1, 1), dtype=np.bool_),
                 np.array([[127, 0], [3, -128]], dtype=np.int8),
-                np.int8,
             ),
         ],
     )
-    def test_MaskedLinear(self, x, weights, expected_dtype):
+    def test_MaskedLinear(
+        self,
+        x,
+        weights,
+    ):
         if x.ndim == 1:
             in_shape = (1, x.shape[0])
         else:
@@ -242,7 +231,7 @@ def test_MaskedLinear(self, x, weights, expected_dtype):
         y2 = x.flatten() @ f.connectivity.astype(np.int32)
         expected = x.reshape(in_shape).transpose(axes) @ weights.copy().astype(np.int32)
 
-        assert f.connectivity.dtype == expected_dtype
+        assert f.connectivity.dtype == WEIGHT_DTYPE
         assert y.shape == oshape
         assert y2.dtype == np.int32
         assert np.array_equal(y, expected)

From 17a106053b78bd550d3b2cb6ac7da329166ccae4 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 10 Jul 2024 09:56:38 +0800
Subject: [PATCH 009/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20bit=20r?=
 =?UTF-8?q?eversal=20functions=20&=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_template.py |  6 ++----
 paibox/utils.py                 | 21 +++++++++++++--------
 tests/test_utils.py             | 26 +++++++++++++++++++-------
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index 8bb7baf5..080f8fba 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -36,10 +36,8 @@
 else:
     from typing_extensions import TypeAlias
 
-from typing_extensions import NotRequired
-
 from paibox.components import Neuron
-from paibox.utils import bit_reversal
+from paibox.utils import reverse_8bit
 
 from .context import _BACKEND_CONTEXT
 from .types import AxonCoord, NeuSegment, NodeName
@@ -603,7 +601,7 @@ def to_clk_en_L2_u8(L2_inchip: list[RoutingCoord]) -> list[int]:
         for _ in range(8):
             u8 = bitmap & _mask(8)
             bitmap >>= 8
-            clk_en.append(bit_reversal(u8))
+            clk_en.append(reverse_8bit(u8))
 
         return clk_en
 
diff --git a/paibox/utils.py b/paibox/utils.py
index cbf6c17c..08c9ec59 100644
--- a/paibox/utils.py
+++ b/paibox/utils.py
@@ -143,14 +143,19 @@ def typical_round(n: float) -> int:
         return int(n) + 1
 
 
-def bit_reversal(uint: int, n_bit: int = 8) -> int:
-    """Reverse the bit order of a N-bit unsigned integer, where N is `n_bit`."""
-    reversed = 0
-    for i in range(n_bit):
-        if (uint >> i) & 1:
-            reversed += 1 << (n_bit - 1 - i)
-
-    return reversed
+def reverse_8bit(x: int) -> int:
+    """Reverse the bit order of 8-bit unsigned integer."""
+    x = ((x & 0xAA) >> 1) | ((x & 0x55) << 1)
+    x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2)
+    x = ((x & 0xF0) >> 4) | ((x & 0x0F) << 4)
+    return x
+
+
+def reverse_16bit(x: int) -> int:
+    x = ((x & 0xAAAA) >> 1) | ((x & 0x5555) << 1)
+    x = ((x & 0xCCCC) >> 2) | ((x & 0x3333) << 2)
+    x = ((x & 0xF0F0) >> 4) | ((x & 0x0F0F) << 4)
+    return ((x >> 8) | (x << 8)) & 0xFFFF
 
 
 def arg_check_pos(arg: int, desc: Optional[str] = None) -> int:
diff --git a/tests/test_utils.py b/tests/test_utils.py
index faa17e2e..c482a099 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,6 @@
 import pytest
 
-from paibox.utils import bit_reversal, fn_sgn, typical_round
+from paibox.utils import reverse_8bit, reverse_16bit, fn_sgn, typical_round
 
 
 @pytest.mark.parametrize("a,b, expected", [(1, 0, 1), (1, 2, -1), (3, 3, 0)])
@@ -16,12 +16,24 @@ def test_typical_round(n, expected):
 
 
 @pytest.mark.parametrize(
-    "uint, n_bit, expected",
+    "x, expected",
     [
-        (0b10110, 5, 0b01101),
-        (0b0111_0111_1001_1001, 10, 0b1001_1001_11),
-        (0b1010_1100_1101, 7, 0b1011_001),
+        (0b1001_0110, 0b0110_1001),
+        (0b0001_1001, 0b1001_1000),
+        (0b1100_1101, 0b1011_0011),
     ],
 )
-def test_bit_reversal(uint, n_bit, expected):
-    assert bit_reversal(uint, n_bit) == expected
+def test_reverse_8bit(x, expected):
+    assert reverse_8bit(x) == expected
+
+
+@pytest.mark.parametrize(
+    "x, expected",
+    [
+        (0b0110_0001_1001_0111, 0b1110_1001_1000_0110),
+        (0b1110_0011_0001_1001, 0b1001_1000_1100_0111),
+        (0b1100_1101_1001_1101, 0b1011_1001_1011_0011),
+    ],
+)
+def test_reverse_16bit(x, expected):
+    assert reverse_16bit(x) == expected

From d6fe959bbf7b81b1a680305ffd6d4b6fe7aa4fe3 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sun, 14 Jul 2024 13:36:12 +0800
Subject: [PATCH 010/187] =?UTF-8?q?=E2=9C=A8=20support=20finding=20axons?=
 =?UTF-8?q?=20address=20with=208-bit=20input=20width?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/segment_utils.py | 89 ++++++++++++++++++++++-----------
 1 file changed, 59 insertions(+), 30 deletions(-)

diff --git a/paibox/backend/segment_utils.py b/paibox/backend/segment_utils.py
index 083862e3..e75bd8c5 100644
--- a/paibox/backend/segment_utils.py
+++ b/paibox/backend/segment_utils.py
@@ -1,5 +1,4 @@
 import warnings
-from collections.abc import Sequence
 from functools import partial
 from math import ceil
 from typing import Literal
@@ -256,22 +255,18 @@ def get_neu_segments(
 
 
 def get_axon_segments(
-    axons: Sequence[SourceNodeType], tr_max: int, fan_in_max: int
+    axons: list[SourceNodeType], tr_max: int, n_fanin: int
 ) -> dict[SourceNodeType, AxonSegment]:
     """Divide axons into segments by group to fit the hardware constraints.
 
     Args:
-        - axons: The axons to be segmented.
-        - tr_max: The maximum value of the time slot(=n_timeslot).
-        - fan_in_max: The value of fan-in per dendrite(=N_FANIN_PER_DENDRITE_XNN).
-
-    TODO Provide an alternative when failed.
+        - axons: the axons to be segmented.
+        - tr_max: the maximum value of the time slot(n_timeslot).
+        - n_fanin: the fan-in of cores.
     """
 
-    def _seg_alloc(axon: SourceNodeType) -> AxonSegment:
+    def _seg_alloc(axon: SourceNodeType, offset: int) -> tuple[AxonSegment, int]:
         """Allocate an axon segment, return the next offset of axon address."""
-        nonlocal offset
-
         # The width of assigned address
         if axon.num_out % tr_max > 0:
             addr_width = axon.num_out // tr_max + 1
@@ -280,58 +275,92 @@ def _seg_alloc(axon: SourceNodeType) -> AxonSegment:
             addr_width = axon.num_out // tr_max
             # n_axon_rest = 0
 
-        if offset + addr_width > fan_in_max:
+        if offset + addr_width > n_fanin:
             raise ResourceError(
-                f"axons address out of range [0, {fan_in_max}) ({offset + addr_width})."
+                f"axons address out of range [0, {n_fanin}) ({offset + addr_width})."
             )
 
-        cur_offset = offset
-        offset += addr_width
-
-        return AxonSegment(axon.num_out, addr_width, cur_offset)
+        return AxonSegment(axon.num_out, addr_width, offset), offset + addr_width
 
     offset = 0
     axon_segments = dict()
 
     for axon in axons:
-        segment = _seg_alloc(axon)
+        segment, offset = _seg_alloc(axon, offset)
         axon_segments[axon] = segment
 
     return axon_segments
 
 
 def aligned_coords(
-    neu_index: NeuSlice, axon_seg: AxonSegment, delay: int, dest_n_timeslot: int
+    neu_index: NeuSlice,
+    axon_seg: AxonSegment,
+    delay: int,
+    dest_n_timeslot: int,
+    is_iw8: bool,
 ) -> list[AxonCoord]:
     """Find the axon segments aligned with the index of neuron segment.
 
-    The length of axon coordinates is the same as `neu_index`.
+    NOTE: Axons are described in a tuple (tick_relative, axon_addr). Axis 'tr' is used as the row   \
+        coordinates while axis 'axon' is used as the column coordinates.
+
+        | ------- AxonSeg[0] ------- | ------- AxonSeg[1] ------- | ...
+    tr=0 A1[0]   A1[1]   ...  A1[99]   A2[0]   A2[1]   ... A2[199]
+    tr=1 A1[100] A1[101] ... A1[199]   A2[200] A2[201] ... A2[399]
+    
+    The target axon may be Ax[100:499], where (tr=0, offset+100) is the start and (tr=2, offset+499)\
+        is the end.
+            offset
+              | <--------- width --------> |
+        | ... | ------- AxonSeg[x] ------- | ...
+    tr=0  ...   Ax[0]   Ax[1]   ... Ax[199]
+    tr=1  ...   Ax[200] Ax[201] ... Ax[399]
+    tr=2  ...   Ax[400] Ax[401] ... Ax[599]
+    
+    When the input width is 8 bits, each A[x] occupies 8 bits. The interval of axons is 8.
     """
-    axon_coords = []
     addr_width = axon_seg.addr_width
     addr_offset = axon_seg.addr_offset
 
     # tick_relative = n_timeslot * (delay - 1) + tr_offset (start & end)
     tr_base = dest_n_timeslot * (delay - 1)
-
     tr_offset_start, tr_offset_stop = (
         neu_index.start // addr_width,
         neu_index.stop // addr_width,
     )
     addr_start, addr_stop = (neu_index.start % addr_width, neu_index.stop % addr_width)
 
+    _addr_interval = 8 if is_iw8 else 1
+
     if tr_offset_stop == tr_offset_start:
-        for addr in range(addr_start, addr_stop):
-            axon_coords.append(AxonCoord(tr_base + tr_offset_start, addr_offset + addr))
+        axon_coords = [
+            AxonCoord(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
+            for addr in range(addr_start, addr_stop)
+        ]
     else:
-        for addr in range(addr_start, addr_width):
-            axon_coords.append(AxonCoord(tr_base + tr_offset_start, addr_offset + addr))
-
+        # First row: addr_start -> end
+        acoords_first = [
+            AxonCoord(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
+            for addr in range(addr_start, addr_width)
+        ]
+
+        # Middle rows
+        acoords_mid = []
         for tr in range(tr_offset_start + 1, tr_offset_stop):
-            for addr in range(addr_width):
-                axon_coords.append(AxonCoord(tr_base + tr, addr_offset + addr))
+            acoords_mid.extend(
+                AxonCoord(tr_base + tr, (addr_offset + addr) * _addr_interval)
+                for addr in range(addr_width)
+            )
+
+        # Last row: start -> addr_stop
+        acoords_last = [
+            AxonCoord(tr_base + tr_offset_stop, (addr_offset + addr) * _addr_interval)
+            for addr in range(addr_stop)
+        ]
 
-        for addr in range(addr_stop):
-            axon_coords.append(AxonCoord(tr_base + tr_offset_stop, addr_offset + addr))
+        axon_coords = []
+        axon_coords.extend(acoords_first)
+        axon_coords.extend(acoords_mid)
+        axon_coords.extend(acoords_last)
 
     return axon_coords

From 3d5caa46c3e7bfc584c94db25d1b1bd3e276e9f0 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sun, 14 Jul 2024 13:36:56 +0800
Subject: [PATCH 011/187] =?UTF-8?q?=E2=9C=85=20add=20test=20cases=20for=20?=
 =?UTF-8?q?finding=20axons=20address=20with=208-bit=20input=20width?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/conftest.py           | 100 ++++++++++++++++++++++------
 tests/backend/test_segment_utils.py |   5 +-
 2 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index 7d1285ce..0f3adac2 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -981,17 +981,11 @@ def packbits1():
 
 
 def n_axon2lcn_ex_proto(n_axon, n_fanin_max) -> LCN_EX:
-    """Convert #N(of axons) to `LCN_EX` & check.
-
-    NOTE: LCN_EX = log2[ceil(#N/fan-in per dendrite)], where `LCN_1X` = 0.
-    """
     if n_axon < 1:
-        raise ValueError(f"the number of axons must be positive, but got {n_axon}.")
+        raise ValueError
 
     if (lcn := ((n_axon - 1) // n_fanin_max).bit_length()) > LCN_EX.LCN_64X:
-        raise ResourceError(
-            f"required LCN extension out of range {LCN_EX.LCN_64X} ({lcn}). "
-        )
+        raise ResourceError
 
     return LCN_EX(lcn)
 
@@ -1620,13 +1614,15 @@ class TestData:
     )
 
     aligned_coords_test_data = ParametrizedTestData(
-        args="neu_index, axon_seg, delay, n_timeslot, expected",
+        args="neu_index, axon_seg, delay, n_timeslot, is_iw8, expected",
         data=[
+            # iw1
             (
                 slice(5, 8),
                 AxonSegment(12, 3, 0),
                 1,
                 1 << 1,
+                False,
                 [
                     AxonCoord(1, 2),
                     AxonCoord(2, 0),
@@ -1638,17 +1634,15 @@ class TestData:
                 AxonSegment(12, 3, 0),
                 2,
                 1 << 1,
-                [
-                    AxonCoord(2 + 0, 0),
-                    AxonCoord(2 + 0, 1),
-                    AxonCoord(2 + 0, 2),
-                ],
+                False,
+                [AxonCoord(2 + 0, i) for i in range(3)],
             ),
             (
                 slice(1, 5),
                 AxonSegment(12, 3, 0),
                 2,
                 1 << 2,
+                False,
                 [
                     AxonCoord(4 + 0, 1),
                     AxonCoord(4 + 0, 2),
@@ -1661,6 +1655,7 @@ class TestData:
                 AxonSegment(12, 3, 0),
                 4,
                 1 << 3,
+                False,
                 [
                     AxonCoord(24 + 0, 1),
                     AxonCoord(24 + 0, 2),
@@ -1674,15 +1669,78 @@ class TestData:
                 AxonSegment(16, 4, 4),
                 4,
                 1 << 4,
+                False,
+                [AxonCoord(48 + 0, 4 + 3)]
+                + [AxonCoord(48 + 1, 4 + i) for i in range(4)]
+                + [AxonCoord(48 + 2, 4 + 0), AxonCoord(48 + 2, 4 + 1)],
+            ),
+            # iw8
+            (
+                slice(5, 8),
+                AxonSegment(12, 3, 0),
+                1,
+                1 << 1,
+                True,
                 [
-                    AxonCoord(48 + 0, 4 + 3),
-                    AxonCoord(48 + 1, 4 + 0),
-                    AxonCoord(48 + 1, 4 + 1),
-                    AxonCoord(48 + 1, 4 + 2),
-                    AxonCoord(48 + 1, 4 + 3),
-                    AxonCoord(48 + 2, 4 + 0),
-                    AxonCoord(48 + 2, 4 + 1),
+                    AxonCoord(1, 8 * 2),
+                    AxonCoord(2, 8 * 0),
+                    AxonCoord(2, 8 * 1),
                 ],
             ),
+            (
+                slice(0, 3),
+                AxonSegment(12, 3, 0),
+                2,
+                1 << 1,
+                True,
+                [AxonCoord(2 + 0, 8 * i) for i in range(3)],
+            ),
+            (
+                slice(1, 5),
+                AxonSegment(12, 3, 0),
+                2,
+                1 << 2,
+                True,
+                [
+                    AxonCoord(4 + 0, 8 * 1),
+                    AxonCoord(4 + 0, 8 * 2),
+                    AxonCoord(4 + 1, 8 * 0),
+                    AxonCoord(4 + 1, 8 * 1),
+                ],
+            ),
+            (
+                slice(1, 6),
+                AxonSegment(12, 3, 0),
+                4,
+                1 << 3,
+                True,
+                [
+                    AxonCoord(24 + 0, 8 * 1),
+                    AxonCoord(24 + 0, 8 * 2),
+                    AxonCoord(24 + 1, 8 * 0),
+                    AxonCoord(24 + 1, 8 * 1),
+                    AxonCoord(24 + 1, 8 * 2),
+                ],
+            ),
+            (
+                slice(5, 15),
+                AxonSegment(16, 8, 16),
+                1,
+                1 << 1,
+                True,
+                [AxonCoord(0, 8 * (16 + i)) for i in range(5, 8)]
+                + [AxonCoord(1, 8 * (16 + i)) for i in range(7)],
+            ),
+            (
+                slice(5, 35),
+                AxonSegment(40, 10, 10),
+                1,
+                1 << 2,
+                True,
+                [AxonCoord(0, 8 * (10 + i)) for i in range(5, 10)]
+                + [AxonCoord(1, 8 * (10 + i)) for i in range(10)]
+                + [AxonCoord(2, 8 * (10 + i)) for i in range(10)]
+                + [AxonCoord(3, 8 * (10 + i)) for i in range(5)],
+            ),
         ],
     )
diff --git a/tests/backend/test_segment_utils.py b/tests/backend/test_segment_utils.py
index 06981f57..2c6bed4d 100644
--- a/tests/backend/test_segment_utils.py
+++ b/tests/backend/test_segment_utils.py
@@ -119,6 +119,5 @@ def test_get_axon_segments_boundary(axons):
     TestData.aligned_coords_test_data["args"],
     TestData.aligned_coords_test_data["data"],
 )
-def test_aligned_coords(neu_index, axon_seg, delay, n_timeslot, expected):
-    axon_coords = aligned_coords(neu_index, axon_seg, delay, n_timeslot)
-    assert axon_coords == expected
+def test_aligned_coords(neu_index, axon_seg, delay, n_timeslot, is_iw8, expected):
+    assert aligned_coords(neu_index, axon_seg, delay, n_timeslot, is_iw8) == expected

From 4a0face423e9b826097468deefed3bc2aadcb0c1 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 15 Jul 2024 11:18:05 +0800
Subject: [PATCH 012/187] =?UTF-8?q?=F0=9F=8E=A8=20update=20formats=20&=20c?=
 =?UTF-8?q?omments?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_template.py     | 3 ++-
 paibox/backend/graphs.py            | 6 +++---
 paibox/backend/mapper.py            | 2 +-
 paibox/backend/routing.py           | 6 ++++++
 paibox/components/neuron/neurons.py | 4 ++--
 5 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index 080f8fba..13ed9dca 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 import sys
 from collections import defaultdict
 from dataclasses import asdict, dataclass
@@ -355,7 +356,7 @@ def gen_config_frames_by_coreconf(
     write_to_file: bool,
     fp: Path,
     split_by_chip: bool,
-    formats: list[str],
+    formats: Sequence[str],
 ) -> dict[ChipCoord, list[FrameArrayType]]:
     """Generate configuration frames by given the `CorePlmConfig`."""
 
diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index e411f1a2..e41d7dbc 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -302,7 +302,7 @@ def _roundup_to_pow2(n: int) -> int:
         is_optimized = False
 
         if optim_nodes == ():
-            _optim_nodes = reversed(self.ordered_nodes)
+            _optim_nodes = list(reversed(self.ordered_nodes))
         else:
             _optim_nodes = optim_nodes
 
@@ -613,8 +613,8 @@ def convert2routing_groups(
                 else:
                     succ_cb_gid_dict[succ_cb._routing_id] = [succ_cb]
 
-            for succ_cb in succ_cb_gid_dict.values():
-                routing_groups.append(RoutingGroup(*succ_cb))
+            for v in succ_cb_gid_dict.values():
+                routing_groups.append(RoutingGroup(*v))
 
     routing_groups_succ: dict[RoutingGroup, list[RoutingGroup]] = defaultdict(list)
 
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index fe043164..45b1164a 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -455,7 +455,7 @@ def _member_cb_and_onode_config_export(self) -> OutputDestConf:
             "n4": {...} # as output node #2
         }
         """
-        output_dest_info = defaultdict(dict)
+        output_dest_info: OutputDestConf = defaultdict(dict)
         # Shallow copy
         ocoord = copy(_BACKEND_CONTEXT["output_core_addr_start"])
 
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 91165361..dc0a1507 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -527,9 +527,15 @@ def chip_coord(self) -> ChipCoord:
 
         return self[0].chip_coord
 
+    def __contains__(self, cb: CoreBlock) -> bool:
+        return cb in self.core_blocks
+
     def __getitem__(self, idx: int) -> CoreBlock:
         return self.core_blocks[idx]
 
+    def __iter__(self) -> Iterator[CoreBlock]:
+        return self.core_blocks.__iter__()
+
 
 @final
 class RoutingRoot:
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index b1e8b242..e92f01f6 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -28,7 +28,7 @@ def __init__(
         Args:
             - shape: shape of neurons.
             - threshold: when the membrane potential exceeds the threshold, neurons will fire.
-            - reset_v: If not specified, neurons will do soft reset after firing, v - threshold. If \
+            - reset_v: if not specified, neurons will do soft reset after firing, v - threshold. If \
                 specified, neurons will do hard reset after firing, v = reset_v.
             - neg_threshold: signed negative theshold. If not specified, it will be the smallest    \
                 negative integer allowed by the hardware.
@@ -94,7 +94,7 @@ def __init__(
             - leak_v: the signed leak voltage will be added directly to the membrane potential.
                 - If it is positive, the membrane potential will increase.
                 - If is is negative, the membrane potential will decrease.
-                - the final leak_v is leak_v + bias (default=0).
+                - The final leak_v is leak_v + bias (default=0).
             - bias: if a signed bias is given, it will be added to `leak_v`. The neuron will leak   \
                 before threshold comparison. `leak_v` will also be considered now.
             - neg_threshold: signed negative theshold. If not specified, it will be the smallest    \

From 62891af47f7545fc7d70a3e08fce32321098367e Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 15 Jul 2024 11:18:49 +0800
Subject: [PATCH 013/187] =?UTF-8?q?=F0=9F=94=A7=20allow=20`None`=20as=20`n?=
 =?UTF-8?q?eg=5Fthreshold`=20to=20use=20default=20minimum=20threshold?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/base.py    |  5 ++++-
 paibox/components/neuron/neurons.py | 16 +++-------------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 3fa0576f..07ca9aa3 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -407,7 +407,7 @@ def __init__(
         leak_comparison: LCM = LCM.LEAK_BEFORE_COMP,
         threshold_mask_bits: int = 0,
         neg_thres_mode: NTM = NTM.MODE_RESET,
-        neg_threshold: int = NEG_THRES_MIN,
+        neg_threshold: Optional[int] = None,
         pos_threshold: int = 1,
         leak_direction: LDM = LDM.MODE_FORWARD,
         leak_integration_mode: Union[L[0, 1], bool, LIM] = LIM.MODE_DETERMINISTIC,
@@ -426,6 +426,9 @@ def __init__(
         keep_shape: bool = True,
         name: Optional[str] = None,
     ) -> None:
+        if neg_threshold is None:
+            neg_threshold = NEG_THRES_MIN
+
         if neg_threshold > 0:
             # XXX *(-1) if passing a negative threshold > 0
             neg_threshold = (-1) * neg_threshold
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index e92f01f6..46b37251 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -6,7 +6,7 @@
 from paibox.types import DataArrayType, Shape
 
 from .base import Neuron
-from .utils import LEAK_V_MAX, NEG_THRES_MIN
+from .utils import LEAK_V_MAX
 
 __all__ = ["IF", "LIF", "TonicSpiking", "PhasicSpiking", "SpikingRelu"]
 
@@ -52,17 +52,12 @@ def __init__(
             _reset_v = 0
             _rm = RM.MODE_LINEAR
 
-        if isinstance(neg_threshold, int):
-            _neg_threshold = neg_threshold
-        else:
-            _neg_threshold = NEG_THRES_MIN
-
         super().__init__(
             shape,
             reset_mode=_rm,
             reset_v=_reset_v,
             neg_thres_mode=NTM.MODE_SATURATION,
-            neg_threshold=_neg_threshold,
+            neg_threshold=neg_threshold,
             pos_threshold=threshold,
             keep_shape=keep_shape,
             name=name,
@@ -121,17 +116,12 @@ def __init__(
         # Support passing in bias & leak_v at the same time
         _leak_v = leak_v + _bias
 
-        if isinstance(neg_threshold, int):
-            _neg_threshold = neg_threshold
-        else:
-            _neg_threshold = NEG_THRES_MIN
-
         super().__init__(
             shape,
             reset_mode=_rm,
             reset_v=_reset_v,
             neg_thres_mode=NTM.MODE_SATURATION,
-            neg_threshold=_neg_threshold,
+            neg_threshold=neg_threshold,
             pos_threshold=threshold,
             leak_v=_leak_v,
             keep_shape=keep_shape,

From 36ea6af2ff3bfeacbaaed3c363fd389ce2d0b360 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 15 Jul 2024 17:18:05 +0800
Subject: [PATCH 014/187] =?UTF-8?q?=F0=9F=8F=B7=EF=B8=8F=20add=20rt=5Fmode?=
 =?UTF-8?q?=20for=20ANN=20deployment?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/types.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 4cc35fa2..d2fa08cc 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -45,7 +45,12 @@
 SourceNodeType: TypeAlias = NodeType
 DestNodeType: TypeAlias = Neuron
 
-WeightRamType: TypeAlias = NDArray[np.uint64]  # uint64 weights mapped in weight RAM
+WRAM_UNPACKED_DTYPE = np.uint8
+WRAM_PACKED_DTYPE = np.uint64
+# Type of unpacked weight in WRAM
+WRAMUnpackedType: TypeAlias = NDArray[WRAM_UNPACKED_DTYPE]
+# Type of packed weight in WRAM
+WRAMPackedType: TypeAlias = NDArray[WRAM_PACKED_DTYPE]
 _COORD_UNSET = 0
 _DEGREE_UNSET = -1
 
@@ -90,6 +95,7 @@ class EdgeAttr(NamedTuple):
 class PartitionedEdges(NamedTuple):
     edges: set[EdgeType]
     rg_id: int
+    rt_mode: CoreMode = CoreMode.MODE_SNN  # XXX Temp solution
 
 
 NeuSlice: TypeAlias = slice
@@ -156,7 +162,7 @@ class AxonSegment(NamedTuple):
 class CoreAbstract(PAIBoxObject, ABC):
     """Abstract core class."""
 
-    runtime_mode: CoreMode
+    rt_mode: CoreMode
 
     @property
     @abstractmethod
@@ -166,4 +172,15 @@ def n_core_required(self) -> int:
 
     @classmethod
     @abstractmethod
-    def build(cls): ...
+    def build(cls, *args, **kwargs): ...
+
+
+if hasattr(CoreMode, "is_iw8"):
+
+    def is_iw8(mode: CoreMode) -> bool:
+        return mode.is_iw8  # type: ignore
+
+else:
+
+    def is_iw8(mode: CoreMode) -> bool:
+        return mode is CoreMode.MODE_ANN_TO_BANN_OR_SNN or mode is CoreMode.MODE_ANN

From bb666bf1092b117458bc4ddfd31a30644764458a Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 16 Jul 2024 23:38:37 +0800
Subject: [PATCH 015/187] =?UTF-8?q?=F0=9F=92=A5=20support=20ANN=20mode=20o?=
 =?UTF-8?q?f=20the=20backend?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py    |  21 +-
 paibox/backend/mapper.py    |   7 +-
 paibox/backend/placement.py | 395 ++++++++++++++++++++----------------
 3 files changed, 241 insertions(+), 182 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index e41d7dbc..d6db0e1b 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -14,7 +14,7 @@
 
 from .constrs import GraphNodeConstrs
 from .context import _BACKEND_CONTEXT
-from .placement import CoreBlock, neuron_repl_prop
+from .placement import CoreBlock
 from .routing import RoutingGroup
 from .segment_utils import get_neu_segments
 from .types import *
@@ -215,6 +215,8 @@ def graph_partition(self) -> list[PartitionedEdges]:
             and the edges connected to these partitioned nodes will be returned as a set.
 
         Return: a list of partitioned edges & a list of routing groups id.
+
+        TODO constraints in partitioning: iw, sw, snn_en, tws, twe, pool_max_en.
         """
         self.build_check()
 
@@ -264,6 +266,10 @@ def graph_partition(self) -> list[PartitionedEdges]:
                 succ_nodes_set.update(self._raw_nodes[n] for n in self.succ_dg[_node])
 
             succ_nodes_lst: list[NodeType] = list(succ_nodes_set)
+            mode = succ_nodes_lst[0].mode
+            if any(mode != node.mode for node in succ_nodes_lst):
+                raise NotSupportedError("mixed mode is not supported.")
+
             idx_of_sg = GraphNodeConstrs.tick_wait_attr_constr(succ_nodes_lst)
 
             if len(idx_of_sg) > 0:
@@ -274,10 +280,9 @@ def graph_partition(self) -> list[PartitionedEdges]:
                             e.edge
                             for e in self.pred_dg[succ_nodes_lst[i].name].values()
                         )
-                    gh_parts.append(PartitionedEdges(succ_edges_sg, rgid))
-
+                    gh_parts.append(PartitionedEdges(succ_edges_sg, rgid, rt_mode=mode))
             else:
-                gh_parts.append(PartitionedEdges(succ_edges_set, rgid))
+                gh_parts.append(PartitionedEdges(succ_edges_set, rgid, rt_mode=mode))
 
             rgid += 1
 
@@ -337,8 +342,8 @@ def _roundup_to_pow2(n: int) -> int:
             n_core_required_after_copy = len(
                 get_neu_segments(
                     pred_cb_dest,
-                    pred_cb.neuron_capacity,
-                    neuron_repl_prop(pred_cb.n_weight_bits, pred_cb.n_timeslot),
+                    pred_cb.n_fanout,
+                    pred_cb.n_neuron_repl,
                     _BACKEND_CONTEXT.cflags["grouping_optim_target"],
                 )
             )
@@ -353,8 +358,8 @@ def _roundup_to_pow2(n: int) -> int:
                 n_core_after_split[i] = len(
                     get_neu_segments(
                         dest,  # type: ignore
-                        succ_cb.neuron_capacity,
-                        neuron_repl_prop(succ_cb.n_weight_bits, succ_cb.n_timeslot),
+                        succ_cb.n_fanout,
+                        succ_cb.n_neuron_repl,
                         _BACKEND_CONTEXT.cflags["grouping_optim_target"],
                     )
                 )
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 45b1164a..4f424b68 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -36,7 +36,7 @@
 )
 from .placement import CoreBlock, aligned_coords, max_lcn_of_cb
 from .routing import RoutingGroup, RoutingRoot
-from .types import NeuSegment, NodeDegree, NodeType, SourceNodeType
+from .types import NeuSegment, NodeDegree, NodeType, SourceNodeType, is_iw8
 
 __all__ = ["Mapper"]
 
@@ -217,7 +217,9 @@ def build_core_blocks(self) -> None:
 
         for part in partitioned_edges:
             self.core_blocks.append(
-                CoreBlock.build(*part.edges, seed=0, routing_id=part.rg_id)
+                CoreBlock.build(
+                    *part.edges, routing_id=part.rg_id, rt_mode=part.rt_mode
+                )
             )
 
         for cur_cb in self.core_blocks:
@@ -411,6 +413,7 @@ def _inpproj_config_export(self) -> InputNodeConf:
                 input_cb.axon_segments[inode],
                 1,
                 input_cb.n_timeslot,
+                is_iw8(input_cb.rt_mode),
             )
 
             inp_neuron_dest = InputNeuronDest(
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 809b3eb8..f5893f88 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -7,9 +7,14 @@
 from paicorelib import WeightPrecision as WP
 
 from paibox.components import FullConnectedSyn, Neuron
-from paibox.exceptions import GraphBuildError, ResourceError, TruncationWarning
+from paibox.exceptions import (
+    GraphBuildError,
+    NotSupportedError,
+    ResourceError,
+    TruncationWarning,
+)
 from paibox.types import WeightType, WEIGHT_DTYPE
-from paibox.utils import check_attr_same, count_unique_elem
+from paibox.utils import check_attr_same
 
 from .conf_template import (
     CoreConfig,
@@ -22,6 +27,8 @@
 from .segment_utils import aligned_coords, get_axon_segments, get_neu_segments
 from .types import (
     _COORD_UNSET,
+    WRAM_PACKED_DTYPE,
+    WRAM_UNPACKED_DTYPE,
     AxonCoord,
     AxonSegment,
     CoreAbstract,
@@ -30,11 +37,36 @@
     NeuSegOfCoreBlock,
     NeuSegOfCorePlm,
     SourceNodeType,
-    WeightRamType,
+    WRAMPackedType,
+    WRAMUnpackedType,
+    is_iw8,
 )
 
 
 class CoreBlock(CoreAbstract):
+
+    _parents: tuple[FullConnectedSyn, ...]
+    _routing_id: int
+    seed: int
+    """Random seed, legal integer, no more than uint64."""
+    _lcn_ex: LCN_EX
+    _lcn_locked: bool
+    """Indicate whether `lcn_ex` has been adjusted & locked."""
+    target_lcn: LCN_EX
+    """The target(destination core block) LCN."""
+    chip_coord: ChipCoord
+    """A core block must be placed on a chip."""
+    core_coords: list[Coord]
+    """Assigned core coordinates."""
+    core_placements: dict[Coord, "CorePlacement"]
+    """Core placements."""
+    axon_segments: dict[SourceNodeType, AxonSegment] = dict()
+    """A dictionary of segments of each axon(source node)."""
+    neuron_segs_of_cb: NeuSegOfCoreBlock = []
+    """Neuron segments in the core block. Each element in the list represents the neuron    \
+        segments in core placement.
+    """
+
     def __init__(
         self,
         *parents: FullConnectedSyn,
@@ -54,38 +86,18 @@ def __init__(
         """
         super().__init__(name)
         self._parents = parents
-        self._wp = WP.WEIGHT_WIDTH_8BIT  # default value
         self._routing_id = routing_id
-        self.runtime_mode = mode
-
-        self._lcn_ex = self._n_axon2lcn_ex()
-
+        self.rt_mode = mode
         self.seed = seed
-        """Random seed, legal integer, no more than uint64."""
+        self._lcn_ex = self._n_axon2lcn_ex()
 
         self.target_lcn = LCN_EX.LCN_1X
-        """The target(destination core block) LCN."""
-
         self._lcn_locked = False
-        """Used to indicate whether `lcn_ex` has been adjusted."""
-
-        self.core_coords: list[Coord] = list()
-        """Assigned core coordinates."""
-
-        self.chip_coord: ChipCoord = Coord(_COORD_UNSET, _COORD_UNSET)
-        """A core block must be placed on a chip."""
-
-        self.core_placements: dict[Coord, CorePlacement] = dict()
-        """Core placements."""
-
-        # Segment the group of axons.
-        self.axon_segments: dict[SourceNodeType, AxonSegment] = dict()
-        """A dictionary of segments of each axon(source node)."""
-
-        self.neuron_segs_of_cb: NeuSegOfCoreBlock = []
-        """Neuron segments in the core block. Each element in the list \
-            represents the neuron segments in core placement(physical core).
-        """
+        self.core_coords = []
+        self.chip_coord = Coord(_COORD_UNSET, _COORD_UNSET)
+        self.core_placements = dict()
+        self.axon_segments = dict()
+        self.neuron_segs_of_cb = []
 
     def group_neurons(
         self, optim_target: Literal["latency", "core", "both"] = "both"
@@ -95,10 +107,7 @@ def group_neurons(
             raise GraphBuildError("group the neurons after 'lcn_ex' is locked.")
 
         self.neuron_segs_of_cb = get_neu_segments(
-            self.dest,
-            self.neuron_capacity,
-            neuron_repl_prop(self.n_weight_bits, self.n_timeslot),
-            optim_target,
+            self.dest, self.n_fanout, self.n_neuron_repl, optim_target
         )
 
     def core_plm_alloc(self) -> None:
@@ -130,13 +139,13 @@ def _n_axon2lcn_ex(self) -> LCN_EX:
             )
 
         if (
-            lcn := int((self.n_axon - 1) // self.n_fanin_max).bit_length()
+            lcn := ((self.n_axon - 1) // self.n_fanin_base).bit_length()
         ) > LCN_EX.LCN_64X:
-            _max_n_axons = self.n_fanin_max * (1 << LCN_EX.LCN_64X)
+            _max_n_axons = self.n_fanin_base << LCN_EX.LCN_64X
             raise ResourceError(
-                f"required LCN extension out of range {LCN_EX.LCN_64X} ({lcn}). "
-                f"The number of axons must be <= {_max_n_axons}. "
-                f"But synapses {self._obj_repr()} have a total of {self.n_axon} axons."
+                f"required LCN out of range {LCN_EX.LCN_64X} ({lcn}). The number of axons "
+                f"must be <= {_max_n_axons}, but synapses {self._obj_repr} have a total of "
+                f"{self.n_axon} axons."
             )
 
         return LCN_EX(lcn)
@@ -144,15 +153,13 @@ def _n_axon2lcn_ex(self) -> LCN_EX:
     def copy(self):
         raise NotImplementedError
 
-    """Interfaces"""
-
     @property
     def obj(self) -> tuple[FullConnectedSyn, ...]:
         return self._parents
 
     @property
     def shape(self) -> tuple[int, int]:
-        return (count_unique_elem(self.source), count_unique_elem(self.dest))
+        return (len(self.source), len(self.dest))
 
     @property
     def source(self) -> list[SourceNodeType]:
@@ -175,20 +182,12 @@ def n_axon_of(self, index: int) -> int:
     """Boundary limitations"""
 
     @property
-    def neuron_capacity(self) -> int:
-        """Neuron capacity. #N of valid dendrites/#N of dendrites required per neuron.
-
-        FIXME This method ONLY works in SNN runtime_mode. For ANN runtime_mode, use table lookup?
-        """
-        return (self.n_dendrite_max >> self.lcn_ex) // self.n_dendrite_per_neuron
-
-    @property
-    def n_fanin_max(self) -> int:
-        """Maximum #N of fan-in per dendrite."""
+    def n_fanin_base(self) -> int:
+        """The fan-in of cores."""
         return (
-            HwConfig.N_FANIN_PER_DENDRITE_ANN
-            if self.runtime_mode is CoreMode.MODE_ANN
-            else HwConfig.N_FANIN_PER_DENDRITE_SNN
+            HwConfig.N_FANIN_PER_DENDRITE_SNN
+            if self.rt_mode.is_snn
+            else HwConfig.N_FANIN_PER_DENDRITE_ANN
         )
 
     @property
@@ -200,18 +199,10 @@ def weight_precision(self) -> WP:
         # Optimized in `s.weight_precision`.
         return max(s.weight_precision for s in self.obj)
 
-    @property
-    def n_dendrite_per_neuron(self) -> int:
-        """Multiple dendrites will be combined to achieve higher precision weights.
-
-        FIXME The limit on the number of dendrites in SNN/ANN modes is different, which affects \
-            the capacity of neurons in physical core.
-        """
-        return 1 << self.weight_precision
-
     @property
     def n_weight_bits(self) -> int:
-        return self.n_dendrite_per_neuron
+        """Multiple dendrites will be combined to achieve higher precision weights."""
+        return 1 << self.weight_precision
 
     @property
     def lcn_ex(self) -> LCN_EX:
@@ -219,10 +210,9 @@ def lcn_ex(self) -> LCN_EX:
 
     @lcn_ex.setter
     def lcn_ex(self, lcn_ex: LCN_EX) -> None:
-        """Set or adjust the `lcn_ex` & lock."""
         if lcn_ex > LCN_EX.LCN_64X:
             raise ResourceError(
-                f"required LCN extension out of range {LCN_EX.LCN_64X} ({lcn_ex})."
+                f"required LCN out of range {LCN_EX.LCN_64X} ({lcn_ex})."
             )
 
         self._lcn_ex = lcn_ex
@@ -232,12 +222,17 @@ def lcn_ex(self, lcn_ex: LCN_EX) -> None:
     def n_timeslot(self) -> int:
         return 1 << self.lcn_ex
 
+    @property
+    def dendrite_comb_rate(self) -> int:
+        """#N of dendrites will be combined."""
+        return self.lcn_ex + self.weight_precision
+
     @property
     def tws(self) -> int:
         """Attribute `tick_wait_start`."""
         if not check_attr_same(self.dest, "tick_wait_start"):
             raise AttributeError(
-                "Attribute 'tick_wait_start' of the core block are not equal."
+                "attribute 'tick_wait_start' of the core block are not equal."
             )
 
         return self.dest[0].tick_wait_start
@@ -247,7 +242,7 @@ def twe(self) -> int:
         """Attribute `tick_wait_end.`"""
         if not check_attr_same(self.dest, "tick_wait_end"):
             raise AttributeError(
-                "Attribute 'tick_wait_end' of the core block are not equal."
+                "attribute 'tick_wait_end' of the core block are not equal."
             )
 
         return self.dest[0].tick_wait_end
@@ -257,11 +252,12 @@ def n_axon(self) -> int:
         return sum(s.num_out for s in self.axons)
 
     @property
-    def n_dendrite_max(self) -> int:
+    def n_fanout(self) -> int:
+        """The fan-out of cores."""
         return (
-            HwConfig.N_DENDRITE_MAX_ANN
-            if self.runtime_mode is CoreMode.MODE_ANN
-            else HwConfig.N_DENDRITE_MAX_SNN
+            HwConfig.N_DENDRITE_MAX_SNN >> self.dendrite_comb_rate
+            if self.rt_mode.is_snn
+            else FANOUT_IW8[self.dendrite_comb_rate]
         )
 
     @property
@@ -274,10 +270,7 @@ def unrolling_factor(self) -> list[int]:
 
     @property
     def n_neuron_of_plm(self) -> list[int]:
-        """A list of the #N of neurons on each `CorePlacement`.
-
-        FIXME Different in SNN/ANN runtime_mode.
-        """
+        """A list of the #N of neurons on each `CorePlacement`."""
         if len(self.core_coords) == 0:
             raise GraphBuildError("do this after coordinates assignment.")
 
@@ -295,7 +288,7 @@ def group_axons(self) -> None:
             raise GraphBuildError("get axon segments after 'lcn_ex' is locked.")
 
         self.axon_segments = get_axon_segments(
-            self.axons, self.n_timeslot, self.n_fanin_max
+            self.axons, self.n_timeslot, self.n_fanin_base
         )
 
     @cached_property
@@ -340,6 +333,20 @@ def get_raw_weight_of_coord(self, idx: int) -> list[WeightType]:
 
         return w_of_neu_segs
 
+    @property
+    def n_neuron_repl(self) -> int:
+        """The number of neurons that need to be repeatedly placed into NRAM.
+
+        For example, in SNN mode, N[0:3] with LCN_2X & WP8:
+            NRAM [0]  [1]  ... [15] [16] [17] ... [31] ...
+                 N[0] N[0] ... N[0] N[1] N[1] ... N[1] ...
+
+        But at 8-bit input width, neurons don't need to be replicated.
+            NRAM [0]  [1]  ... [15]  [16]  ...
+                 N[0] N[1] ... N[15] N[16] ...
+        """
+        return 1 << self.dendrite_comb_rate if self.rt_mode.is_snn else 1
+
     def __len__(self) -> int:
         return self.n_core_required
 
@@ -349,21 +356,27 @@ def __repr__(self) -> str:
     def __str__(self) -> str:
         return f"<{self.name} of target '{self.obj}'>"
 
+    @property
     def _obj_repr(self) -> str:
         """The representation of the names of target objects."""
         return ", ".join(n.name for n in self.obj)
 
     @classmethod
-    def build(cls, *synapses: FullConnectedSyn, routing_id: int, seed: int = 0):
+    def build(
+        cls,
+        *synapses: FullConnectedSyn,
+        routing_id: int,
+        rt_mode: CoreMode,
+        seed: int = 0,
+    ):
         """Group synapses & build `CoreBlock`."""
-        # FIXME where does the parameter check do?
         if seed > (1 << 64) - 1:
             warnings.warn(
                 f"random seed {seed} is too large, truncated into 64 bits.",
                 TruncationWarning,
             )
 
-        return cls(*synapses, routing_id=routing_id, seed=seed)
+        return cls(*synapses, routing_id=routing_id, mode=rt_mode, seed=seed)
 
     @classmethod
     def export_core_plm_config(cls, cb: "CoreBlock") -> CoreConfInChip:
@@ -377,13 +390,17 @@ def export_core_plm_config(cls, cb: "CoreBlock") -> CoreConfInChip:
 
 
 class CorePlacement(CoreAbstract):
-    """The divided synapse placed on a single CORE."""
-
-    WEIGHT_RAM_SHAPE: ClassVar[tuple[int, int]] = (
-        HwConfig.N_FANIN_PER_DENDRITE_SNN,
-        HwConfig.N_DENDRITE_MAX_SNN,
-    )
-    """SNN mode ONLY."""
+    parent: CoreBlock
+    coord: Coord
+    """Routing coordinate"""
+    n_neuron: int
+    raw_weights: list[WeightType]
+    """The folded weights."""
+    neu_segs_of_cplm: NeuSegOfCorePlm
+    neu_configs: dict[Neuron, NeuronConfig]
+
+    # FIXME Change to HwConfig.ADDR_AXON_MAX(1152) once it is fixed.
+    WRAM_BASE_SHAPE: ClassVar[tuple[int, int]] = (1152, HwConfig.ADDR_RAM_MAX)
 
     def __init__(
         self,
@@ -403,18 +420,13 @@ def __init__(
             - neu_segs_of_cplm: The segment of the neurons in the physical core.
         """
         super().__init__(name)
-
         self.parent = parent
+        self.rt_mode = parent.rt_mode
         self.coord = routing_coord
-        """Routing coordinate"""
-
         self.n_neuron = n_neuron
-
-        self._weights_folded = self._fold_raw_weights(raw_weights)
-        """The folded weights."""
-
+        self.raw_weights = raw_weights
         self.neu_segs_of_cplm = neu_segs_of_cplm
-        self.neu_configs: dict[Neuron, NeuronConfig] = dict()
+        self.neu_configs = dict()
 
     @classmethod
     def build(cls, parent: CoreBlock, idx: int):
@@ -429,12 +441,15 @@ def _fold_raw_weights(self, raw_weights: list[WeightType]) -> WeightType:
         """Fold the weights into LCN-sized blocks."""
         w_folded_list = []
         w_folded_of_axon_segs = []
-        n_fold = self.n_timeslot
+        # See the note of function `_weight_ram_mapping` below.
+        n_fold = (
+            self.n_timeslot
+            if self.rt_mode.is_snn
+            else 1 << (self.dendrite_comb_rate - 3)
+        )
 
         if self.lcn_ex == LCN_EX.LCN_1X:
-            w_folded = np.hstack(raw_weights)
-            w_folded.setflags(write=False)
-            return w_folded
+            return np.hstack(raw_weights)
 
         # LCN_EX > LCN_1X
         for raw_weight in raw_weights:
@@ -459,24 +474,46 @@ def _fold_raw_weights(self, raw_weights: list[WeightType]) -> WeightType:
             w_folded = np.vstack(w_folded_of_axon_segs)
             w_folded_list.append(w_folded)
 
-        w_folded = np.hstack(w_folded_list)
-        w_folded.setflags(write=False)
-        return w_folded
+        return np.hstack(w_folded_list)
+
+    def _weight_ram_mapping(self) -> WRAMPackedType:
+        """Map the raw weights to the weight RAM(WRAM). The mapping is different for both input widths.
+        
+        NOTE: When the input width is 8 bits, no neurons need to be mapped to the WRAM when the combination rate of \
+            dentrites >= 8, while some neurons need to be mapped to the WRAM when < 8. 
+            
+            When the input width is 8 bits and with the combination rate of dentrites > 3, the mapping of weights   \
+            becomes the key to limiting neuron capacity. In this case, if the weight accuracy is less than 8 bits   \
+            (which may also occur when the weight accuracy is optimized), the weight cannot be folded directly in   \
+            the fan-in expansion direction, otherwise the column of the WRAM will exceed the upper limit(512).      \
+            
+            A portion of the fan-in needs to be expanded to an unfilled portion in the direction of the weight      \
+            accuracy. At this point, n_fold=n_timeslot/(8/n_weight_bits)=2^(dendrite_comb_rate - 3). For example,   \
+            for LCN_8X & WP8, the n_fold is 3. For LCN_32X & WP4, the n_fold is 4 (instead of 5).
+            
+        TODO Now, in ANN mode, only the mapping of 8-bit weights is supported. The weight accuracy optimization is  \
+            supposed to disable manually for now.
+        """
+        if not self.rt_mode.is_snn and self.weight_precision < WP.WEIGHT_WIDTH_8BIT:
+            raise NotSupportedError("only support 8-bit weights in ANN mode.")
 
-    def _weight_ram_mapping(self) -> WeightRamType:
-        row, col = self._weights_folded.shape
-        w_unpacked = np.zeros(self.WEIGHT_RAM_SHAPE, dtype=np.uint8)
+        _weights_folded = self._fold_raw_weights(self.raw_weights)
+        row, col = _weights_folded.shape
+        # The 1152*512 unpacked weight
+        w_unpacked = np.zeros(self.WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
 
         if self.n_weight_bits == 1:
-            w_unpacked[:row, :col] = self._weights_folded
+            w_unpacked[:row, :col] = _weights_folded
         else:
-            # (N, M) -> (M*N, 1)
-            w_folded_3d = np.expand_dims(self._weights_folded.T, axis=2).astype(
-                np.uint8
+            # (N, M)(int8) -> (M, N, 1)(uint8)
+            w_folded_3d = np.expand_dims(_weights_folded.T, axis=2).astype(
+                WRAM_UNPACKED_DTYPE
             )
 
+            _n_group_bit = HwConfig.N_FANIN_PER_DENDRITE_ANN
+
             for i in range(col):
-                # For every column, unpack the array [N*1] -> [N*n_weight_bits]
+                # For every column, unpack the array (N, 1) -> (N, n_weight_bits)
                 unpacked = np.unpackbits(
                     w_folded_3d[i],
                     axis=1,
@@ -484,28 +521,18 @@ def _weight_ram_mapping(self) -> WeightRamType:
                     bitorder=HwConfig.WEIGHT_BITORDER,
                 )
 
-                w_unpacked[
-                    :row, self.n_weight_bits * i : self.n_weight_bits * (i + 1)
-                ] = unpacked
-
-        assert np.max(w_unpacked, axis=None) <= np.uint8(1)
-        assert np.min(w_unpacked, axis=None) >= np.uint8(0)
-
-        # Convert the unpacked weights into a mapping format,
-        # corresponding to the RAM address, each address contains 18 uint64.
-        # (1152, 512) -> (512, 1152) -> (512*18, 64)(uint8).
-        # Reshape to 64 columns to avoid contiguous problem.
-        w_unpacked_T_rehaped = w_unpacked.T.reshape(-1, 64)
-
-        # (512*18, 64)(uint8) -> (512*18, 8)(uint8)
-        w_packed_u8 = np.packbits(
-            w_unpacked_T_rehaped, axis=1, bitorder=HwConfig.WEIGHT_BITORDER
-        )
-        # (512*18, 8)(uint8) -> (512*18, 1)(uint64) -> (512, 18)(uint64)
-        w_packed_u64 = w_packed_u8.view(np.uint64).reshape(-1, 18)
-        w_packed_u64.setflags(write=False)
+                if self.rt_mode.is_snn:
+                    w_unpacked[
+                        :row, self.n_weight_bits * i : self.n_weight_bits * (i + 1)
+                    ] = unpacked
+                else:
+                    # In the case of 8-bit input width, the weights are mapped differently
+                    for bit in range(self.n_weight_bits):
+                        w_unpacked[bit * _n_group_bit : bit * _n_group_bit + row, i] = (
+                            unpacked[:, bit]
+                        )
 
-        return w_packed_u64
+        return self._weight_pack(w_unpacked)
 
     @staticmethod
     def _nfold_weight(
@@ -522,23 +549,15 @@ def _nfold_weight(
 
         if raw_row % n_fold > 0:
             n_row_padding = n_fold - raw_row % n_fold
-
-            # Check #1
-            # assert expected_row * n_fold == raw_row + n_row_padding
-
             _raw_weight = np.append(
                 raw_weight,
                 np.zeros((n_row_padding, raw_col), dtype=WEIGHT_DTYPE),
                 axis=0,
             )
         else:
-            _raw_weight = raw_weight.copy()
+            _raw_weight = raw_weight
 
         w_splited = np.vsplit(_raw_weight, n_fold)
-
-        # Check #2
-        # assert _raw_weight.shape[0] == expected_row * n_fold
-
         w_folded = np.zeros((expected_row, raw_col * n_fold), dtype=WEIGHT_DTYPE)
 
         for i, j in np.ndindex((n_fold, raw_col)):
@@ -547,8 +566,30 @@ def _nfold_weight(
 
         return w_folded
 
+    @staticmethod
+    def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
+        """Convert the unpacked weights into a mapping format, corresponding to the WRAM address, each address      \
+            contains 18 uint64.
+            (1152, 512) -> T -> (512*18, 64) -> (512*18, 8) uint8 -> (512*18, 1) uint64 -> (512, 18) uint64.
+        """
+        _n_bit_packed = WRAM_PACKED_DTYPE(1).nbytes * 8  # #N bit of packed dtype
+        # #N of u64 on each NRAM address
+        _n_u64_naddr = CorePlacement.WRAM_BASE_SHAPE[0] // _n_bit_packed
+
+        # Reshape to 64 columns to avoid contiguous problem.
+        w_unpacked_aligned = w_unpacked.T.reshape(-1, _n_bit_packed)
+        # (512*18, 64) uint8 -> (512*18, 8) uint8
+        w_packed_u8 = np.packbits(
+            w_unpacked_aligned, axis=1, bitorder=HwConfig.WEIGHT_BITORDER
+        )
+        # (512*18, 8) uint8 -> (512*18, 1) uint64 -> (512, 18) uint64
+        w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape(-1, _n_u64_naddr)
+        w_packed_u64.setflags(write=False)
+
+        return w_packed_u64
+
     def export_param_config(self) -> CoreConfig:
-        _mode_params = self.mode.conf
+        _mode_params = self.rt_mode.conf
 
         # fmt: off
         cb_config = CoreConfig(
@@ -557,7 +598,7 @@ def export_param_config(self) -> CoreConfig:
             self.lcn_ex,                        # lcn_extension
             _mode_params[0],                    # input_width_format
             _mode_params[1],                    # spike_width_format
-            self.n_dendrite,                    # num_dendrite
+            self.n_working_dendrite,            # num_dendrite
             MaxPoolingEnable.DISABLE,           # max_pooling_en
             self.tws,                           # tick_wait_start
             self.twe,                           # tick_wait_end
@@ -596,6 +637,7 @@ def export_neu_config(
                 axon_dests[0].axon_segments[neu_seg.target],
                 neu_seg.target.delay_relative,
                 axon_dests[0].n_timeslot,
+                is_iw8(axon_dests[0].rt_mode),
             )
 
             # Get all core coordinates and replication ids.
@@ -610,6 +652,7 @@ def export_neu_config(
             )
 
             self.neu_configs[neu_seg.target] = config
+            return None
         else:
             # neu_seg is a part of an output node
             assert isinstance(output_core_coord, Coord)
@@ -639,14 +682,10 @@ def export_core_plm_config(self) -> CorePlmConfig:
             self.parent.seed, self.weight_ram, core_param, self.neu_configs
         )
 
-    @property
-    def mode(self) -> CoreMode:
-        return self.parent.runtime_mode
-
     @property
     def shape(self) -> tuple[int, int]:
-        return (count_unique_elem(self.source), count_unique_elem(self.dest))
-
+        return (len(self.source), len(self.dest))
+ 
     @property
     def weight_precision(self) -> WP:
         return self.parent.weight_precision
@@ -671,6 +710,10 @@ def lcn_ex(self) -> LCN_EX:
     def target_lcn(self) -> LCN_EX:
         return self.parent.target_lcn
 
+    @property
+    def dendrite_comb_rate(self) -> int:
+        return self.parent.dendrite_comb_rate
+
     @property
     def tws(self) -> int:
         return self.parent.tws
@@ -680,15 +723,20 @@ def twe(self) -> int:
         return self.parent.twe
 
     @property
-    def n_dendrite(self) -> int:
-        return self.n_neuron * neuron_repl_prop(self.n_weight_bits, self.n_timeslot)
+    def n_working_dendrite(self) -> int:
+        """The number of actual working dendrites. IN ANN mode, the number of working   \
+            dendrites N <= 4096. In SNN mode, N <= 512.
+
+        NOTE: n_neuron * (2^comb_rate) = n_neuron << comb_rate
+        """
+        return self.n_neuron << self.dendrite_comb_rate
 
     @property
     def source(self) -> list[SourceNodeType]:
         return self.parent.source
 
     @property
-    def dest(self):
+    def dest(self) -> list[DestNodeType]:
         """The destination nodes within it.
 
         NOTE: This attribute is different from the one of its parent.
@@ -696,7 +744,7 @@ def dest(self):
         return [p.target for p in self.neu_segs_of_cplm]
 
     @property
-    def weight_ram(self) -> WeightRamType:
+    def weight_ram(self) -> WRAMPackedType:
         return self._weight_ram_mapping()
 
     @property
@@ -710,13 +758,6 @@ def __len__(self) -> int:
 class EmptyCorePlacement(CoreAbstract):
     """Empty core placement."""
 
-    _default_wp: ClassVar[WP] = WP.WEIGHT_WIDTH_1BIT
-    _default_lcn_ex: ClassVar[LCN_EX] = LCN_EX.LCN_1X
-    _default_n_dendrite: ClassVar[int] = 0
-    _default_tws: ClassVar[int] = 0
-    _default_twe: ClassVar[int] = 0
-    _default_target_lcn: ClassVar[LCN_EX] = LCN_EX.LCN_1X
-
     def __init__(self, coord: Coord, name: Optional[str] = None) -> None:
         super().__init__(name)
         self.coord = coord
@@ -727,16 +768,16 @@ def export_param_config(self) -> CoreConfig:
         # fmt: off
         cb_config = CoreConfig(
             self.name,                          # name of the core
-            self._default_wp,                   # weight_precision
-            self._default_lcn_ex,               # lcn_extension
+            WP.WEIGHT_WIDTH_1BIT,               # weight_precision
+            LCN_EX.LCN_1X,                      # lcn_extension
             _mode_params[0],                    # input_width_format
             _mode_params[1],                    # spike_width_format
-            self._default_n_dendrite,           # num_dendrite
+            0,                                  # num_dendrite
             MaxPoolingEnable.DISABLE,           # max_pooling_en
-            self._default_tws,                  # tick_wait_start
-            self._default_twe,                  # tick_wait_end
+            0,                                  # tick_wait_start
+            0,                                  # tick_wait_end
             _mode_params[2],                    # snn_mode_en
-            self._default_target_lcn,           # target_lcn
+            LCN_EX.LCN_1X,                      # target_lcn
             _BACKEND_CONTEXT.test_chip_addr,    # test_chip_addr
         )
         # fmt: on
@@ -760,9 +801,19 @@ def max_lcn_of_cb(cb: list[CoreBlock]) -> LCN_EX:
     return max(cb, key=lambda cb: cb.lcn_ex).lcn_ex
 
 
-def neuron_repl_prop(nbits: int, ntimeslot: int) -> int:
-    """Get the proportion of neuron replication.
-
-    scale = nbits(1 << wp) * n_timeslot(1 << lcn_ex)
-    """
-    return nbits * ntimeslot
+if hasattr(HwConfig, "FANOUT_IW8"):
+    FANOUT_IW8 = HwConfig.FANOUT_IW8  # type: ignore
+else:
+    # Get the fan-out by the combination rate of dendrites
+    FANOUT_IW8: list[int] = [
+        HwConfig.N_NEURON_MAX_ANN,
+        1364,
+        876,
+        512,
+        256,
+        128,
+        64,
+        32,
+        16,
+        8,
+    ]

From e1631aaf37c939f075bb837da5c772d26665d4ad Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 17 Jul 2024 19:46:58 +0800
Subject: [PATCH 016/187] =?UTF-8?q?=F0=9F=90=9B=20update=20`=5F=5Fdeepcopy?=
 =?UTF-8?q?=5F=5F`=20methods=20to=20accept=20`memo`=20parameter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/types.py            | 2 +-
 paibox/components/neuron/base.py   | 2 +-
 paibox/components/synapses/base.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index d2fa08cc..3eaf594e 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -74,7 +74,7 @@ class NodeDegree:
     def __copy__(self) -> "NodeDegree":
         return self.__deepcopy__()
 
-    def __deepcopy__(self) -> "NodeDegree":
+    def __deepcopy__(self, memo=None) -> "NodeDegree":
         return NodeDegree(self.in_degree, self.out_degree)
 
     def copy(self) -> "NodeDegree":
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 07ca9aa3..67f87509 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -514,7 +514,7 @@ def __copy__(self) -> "Neuron":
         """Same as `__deepcopy__`."""
         return self.__deepcopy__()
 
-    def __deepcopy__(self) -> "Neuron":
+    def __deepcopy__(self, memo=None) -> "Neuron":
         """Deepcopy a neuron.
 
         NOTE: It simply reinitializes a neuron with the parameters of the original neuron.
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 4163cdca..fe3f75a4 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -89,7 +89,7 @@ def reset_state(self, *args, **kwargs) -> None:
     def __copy__(self) -> "FullConnSyn":
         return self.__deepcopy__()
 
-    def __deepcopy__(self) -> "FullConnSyn":
+    def __deepcopy__(self, memo=None) -> "FullConnSyn":
         self._n_copied += 1
 
         return FullConnSyn(

From d94c206ab98143705fca11c5a310fce7722e62a9 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 17 Jul 2024 20:25:34 +0800
Subject: [PATCH 017/187] =?UTF-8?q?=E2=9C=A8=20modified=20the=20logic=20fo?=
 =?UTF-8?q?r=20generating=20cfg=20frames=20III=20&=20IV?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_template.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index 13ed9dca..fe0653ad 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -385,15 +385,29 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
             )
 
             # 3. Iterate all the neuron segments inside the physical core.
+            # FIXME Unfortunately, at present, only the corresponding NRAM can be written based on
+            # the neuron configurations, and it cannot handle the case where the NRAM address is >= 512,
+            # that is, some neurons need to occupy the NRAM, which is inconsistent with the current logic.
+            # Additional neuron configurations has been written to the NRAM within the CorePlacement.
+            # NOTE The meaning of 'n_neuron' in function 'gen_config_frame3' is the number of neurons in
+            # the NRAM. See notes of function '_weight_ram_mapping' of `CorePlacement` in file
+            # backend/placement.py for details.
             config_frame_type3 = []
             for neu_conf in v.neuron_configs.values():
+                # The actual number of neurons placed in NRAM.
+                _n_neuron_nram = (
+                    HwConfig.ADDR_RAM_MAX + 1
+                    if neu_conf.n_neuron > HwConfig.ADDR_RAM_MAX + 1
+                    else neu_conf.n_neuron
+                )
+
                 config_frame_type3.append(
                     OfflineFrameGen.gen_config_frame3(
                         chip_coord,
                         core_coord,
                         _RID_UNSET,
                         neu_conf.addr_offset,
-                        neu_conf.n_neuron,
+                        _n_neuron_nram,
                         neu_conf.neuron_attrs,
                         neu_conf.neuron_dest_info,
                         lcn_ex=v.params_reg.lcn_extension,
@@ -411,15 +425,16 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                 frame3 = np.array([], dtype=FRAME_DTYPE)
 
             # 4. Only one config frame type IV for each physical core.
-            n_addr_write = v.params_reg.num_dendrite  # The number of address to write
-            if n_addr_write > 0:
+            # NOTE To avoid logical complications, write the entire weights to the WRAM, rather than just the
+            # valid partial weights, because there are still some neurons configurations in the WRAM.
+            if v.params_reg.num_dendrite > 0:
                 config_frame_type4 = OfflineFrameGen.gen_config_frame4(
                     chip_coord,
                     core_coord,
                     _RID_UNSET,
                     0,
-                    18 * n_addr_write,
-                    v.weight_ram[:n_addr_write],
+                    18 * (HwConfig.ADDR_RAM_MAX + 1),
+                    v.weight_ram[: HwConfig.ADDR_RAM_MAX + 1],
                 )
             else:
                 config_frame_type4 = None

From d8bc5fda317ab21f94d812fea5be28bb679cc8e7 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 17 Jul 2024 23:01:11 +0800
Subject: [PATCH 018/187] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20optimize=20empty?=
 =?UTF-8?q?=20core=20placement=20cfg=20generation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_template.py | 41 ++++++++++-----------------------
 paibox/backend/placement.py     | 15 +++++-------
 tests/backend/conftest.py       |  6 -----
 3 files changed, 18 insertions(+), 44 deletions(-)

diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index fe0653ad..34276c8d 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -4,7 +4,7 @@
 from dataclasses import asdict, dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Any, ClassVar, NamedTuple, TypedDict, Union
+from typing import Any, NamedTuple, TypedDict, Union
 
 import numpy as np
 from numpy.typing import NDArray
@@ -41,7 +41,7 @@
 from paibox.utils import reverse_8bit
 
 from .context import _BACKEND_CONTEXT
-from .types import AxonCoord, NeuSegment, NodeName
+from .types import WRAMPackedType, AxonCoord, NeuSegment, NodeName
 
 try:
     import orjson
@@ -260,7 +260,7 @@ class CorePlmConfig(NamedTuple):
     """Extra parameters for debugging."""
 
     random_seed: int
-    weight_ram: NDArray[np.uint64]
+    weight_ram: WRAMPackedType
     params_reg: ParamsReg
     neuron_configs: dict[Neuron, NeuronConfig]
 
@@ -268,15 +268,15 @@ class CorePlmConfig(NamedTuple):
     def encapsulate(
         cls,
         random_seed: int,
-        weight_ram: NDArray[np.uint64],
-        core_config: CoreConfig,
-        neuron_configs: dict[Neuron, NeuronConfig],
+        weight_ram: WRAMPackedType,
+        core_cfg: CoreConfig,
+        neuron_cfg: dict[Neuron, NeuronConfig],
     ):
         return cls(
             random_seed,
             weight_ram,
-            ParamsReg.model_validate(core_config._asdict(), strict=True),
-            neuron_configs,
+            ParamsReg.model_validate(core_cfg._asdict(), strict=True),
+            neuron_cfg,
         )
 
     def export(self) -> dict[str, Any]:
@@ -287,11 +287,11 @@ def export(self) -> dict[str, Any]:
             **self.params_reg.model_dump(by_alias=True),
         }
 
-        for neu, neu_config in self.neuron_configs.items():
+        for neu, neu_cfg in self.neuron_configs.items():
             if _USE_ORJSON:
-                dict_["neuron_rams"][neu.name] = orjson.loads(neu_config.to_json())
+                dict_["neuron_rams"][neu.name] = orjson.loads(neu_cfg.to_json())
             else:
-                dict_["neuron_rams"][neu.name] = json.loads(neu_config.to_json())
+                dict_["neuron_rams"][neu.name] = json.loads(neu_cfg.to_json())
 
         return dict_
 
@@ -305,23 +305,6 @@ def to_json(self) -> dict[str, Any]:
         return dict_
 
 
-class EmptyCorePlmConfig(CorePlmConfig):
-    _default_seed: ClassVar[int] = 0
-    _default_zero_wram: ClassVar[NDArray[np.uint64]] = np.zeros(
-        (HwConfig.ADDR_RAM_MAX, 18), dtype=np.uint64
-    )
-    _default_neuron_conf = {}  # don't care
-
-    @classmethod
-    def encapsulate(cls, core_config: CoreConfig):
-        return cls(
-            cls._default_seed,
-            cls._default_zero_wram,
-            ParamsReg.model_validate(core_config._asdict(), strict=True),
-            cls._default_neuron_conf,
-        )
-
-
 InputNodeConf: TypeAlias = dict[NodeName, InputNeuronDest]
 OutputDestConf: TypeAlias = dict[NodeName, dict[CoordAddr, NeuronDestInfo]]
 CorePlmConfInChip: TypeAlias = dict[Coord, CorePlmConfig]
@@ -436,7 +419,7 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                     18 * (HwConfig.ADDR_RAM_MAX + 1),
                     v.weight_ram[: HwConfig.ADDR_RAM_MAX + 1],
                 )
-            else:
+            else:  # empty core placement
                 config_frame_type4 = None
 
             if config_frame_type4:
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index f5893f88..f5e3bd71 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -16,13 +16,7 @@
 from paibox.types import WeightType, WEIGHT_DTYPE
 from paibox.utils import check_attr_same
 
-from .conf_template import (
-    CoreConfig,
-    CoreConfInChip,
-    CorePlmConfig,
-    EmptyCorePlmConfig,
-    NeuronConfig,
-)
+from .conf_template import CoreConfig, CoreConfInChip, CorePlmConfig, NeuronConfig
 from .context import _BACKEND_CONTEXT
 from .segment_utils import aligned_coords, get_axon_segments, get_neu_segments
 from .types import (
@@ -758,6 +752,8 @@ def __len__(self) -> int:
 class EmptyCorePlacement(CoreAbstract):
     """Empty core placement."""
 
+    _EMPTY_WRAM: int = 0
+
     def __init__(self, coord: Coord, name: Optional[str] = None) -> None:
         super().__init__(name)
         self.coord = coord
@@ -783,9 +779,10 @@ def export_param_config(self) -> CoreConfig:
         # fmt: on
         return cb_config
 
-    def export_core_plm_config(self) -> EmptyCorePlmConfig:
+    def export_core_plm_config(self) -> CorePlmConfig:
         core_param = self.export_param_config()
-        return EmptyCorePlmConfig.encapsulate(core_param)
+        # For empty core placements, we don't care random seed, WRAM & neurons cfg.
+        return CorePlmConfig.encapsulate(0, self._EMPTY_WRAM, core_param, {})  # type: ignore
 
     @classmethod
     def build(cls, coord: Coord):
diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index 0f3adac2..1063ea88 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -22,7 +22,6 @@
 from paibox.backend.conf_template import (
     CoreConfig,
     CorePlmConfig,
-    EmptyCorePlmConfig,
     InputNeuronDest,
     NeuronConfig,
     NeuronDest,
@@ -942,11 +941,6 @@ def MockCorePlmConfig(MockCoreConfigDict, MockNeuronConfig):
     return cpc
 
 
-@pytest.fixture
-def MockEmptyCorePlmConfig(MockCoreConfigDict):
-    return EmptyCorePlmConfig.encapsulate(MockCoreConfigDict)
-
-
 def packbits_ref(bits: np.ndarray, count: int) -> int:
     """Pack unsigned bits into a signed integer.
 

From c2e7dd67e74b34142a1aa0ac856dca56bcaca579 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 19 Jul 2024 10:15:39 +0800
Subject: [PATCH 019/187] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20bump=20paicorelib?=
 =?UTF-8?q?=20>=3D1.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 poetry.lock    | 322 ++++++++++++++++++++++++++-----------------------
 pyproject.toml |   2 +-
 2 files changed, 171 insertions(+), 153 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 900fd9a5..50ea8f2f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2,13 +2,13 @@
 
 [[package]]
 name = "annotated-types"
-version = "0.6.0"
+version = "0.7.0"
 description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
-    {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
+    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
+    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
 ]
 
 [package.source]
@@ -34,13 +34,13 @@ reference = "tsinghua"
 
 [[package]]
 name = "exceptiongroup"
-version = "1.2.1"
+version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
-    {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
+    {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
+    {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
 ]
 
 [package.extras]
@@ -119,57 +119,62 @@ reference = "tsinghua"
 
 [[package]]
 name = "orjson"
-version = "3.10.3"
+version = "3.10.6"
 description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "orjson-3.10.3-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9fb6c3f9f5490a3eb4ddd46fc1b6eadb0d6fc16fb3f07320149c3286a1409dd8"},
-    {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:252124b198662eee80428f1af8c63f7ff077c88723fe206a25df8dc57a57b1fa"},
-    {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f3e87733823089a338ef9bbf363ef4de45e5c599a9bf50a7a9b82e86d0228da"},
-    {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8334c0d87103bb9fbbe59b78129f1f40d1d1e8355bbed2ca71853af15fa4ed3"},
-    {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1952c03439e4dce23482ac846e7961f9d4ec62086eb98ae76d97bd41d72644d7"},
-    {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c0403ed9c706dcd2809f1600ed18f4aae50be263bd7112e54b50e2c2bc3ebd6d"},
-    {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:382e52aa4270a037d41f325e7d1dfa395b7de0c367800b6f337d8157367bf3a7"},
-    {file = "orjson-3.10.3-cp310-none-win32.whl", hash = "sha256:be2aab54313752c04f2cbaab4515291ef5af8c2256ce22abc007f89f42f49109"},
-    {file = "orjson-3.10.3-cp310-none-win_amd64.whl", hash = "sha256:416b195f78ae461601893f482287cee1e3059ec49b4f99479aedf22a20b1098b"},
-    {file = "orjson-3.10.3-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:73100d9abbbe730331f2242c1fc0bcb46a3ea3b4ae3348847e5a141265479700"},
-    {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a12eee96e3ab828dbfcb4d5a0023aa971b27143a1d35dc214c176fdfb29b3"},
-    {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520de5e2ef0b4ae546bea25129d6c7c74edb43fc6cf5213f511a927f2b28148b"},
-    {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccaa0a401fc02e8828a5bedfd80f8cd389d24f65e5ca3954d72c6582495b4bcf"},
-    {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7bc9e8bc11bac40f905640acd41cbeaa87209e7e1f57ade386da658092dc16"},
-    {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3582b34b70543a1ed6944aca75e219e1192661a63da4d039d088a09c67543b08"},
-    {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c23dfa91481de880890d17aa7b91d586a4746a4c2aa9a145bebdbaf233768d5"},
-    {file = "orjson-3.10.3-cp311-none-win32.whl", hash = "sha256:1770e2a0eae728b050705206d84eda8b074b65ee835e7f85c919f5705b006c9b"},
-    {file = "orjson-3.10.3-cp311-none-win_amd64.whl", hash = "sha256:93433b3c1f852660eb5abdc1f4dd0ced2be031ba30900433223b28ee0140cde5"},
-    {file = "orjson-3.10.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a39aa73e53bec8d410875683bfa3a8edf61e5a1c7bb4014f65f81d36467ea098"},
-    {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0943a96b3fa09bee1afdfccc2cb236c9c64715afa375b2af296c73d91c23eab2"},
-    {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e852baafceff8da3c9defae29414cc8513a1586ad93e45f27b89a639c68e8176"},
-    {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18566beb5acd76f3769c1d1a7ec06cdb81edc4d55d2765fb677e3eaa10fa99e0"},
-    {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd2218d5a3aa43060efe649ec564ebedec8ce6ae0a43654b81376216d5ebd42"},
-    {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cf20465e74c6e17a104ecf01bf8cd3b7b252565b4ccee4548f18b012ff2f8069"},
-    {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ba7f67aa7f983c4345eeda16054a4677289011a478ca947cd69c0a86ea45e534"},
-    {file = "orjson-3.10.3-cp312-none-win32.whl", hash = "sha256:17e0713fc159abc261eea0f4feda611d32eabc35708b74bef6ad44f6c78d5ea0"},
-    {file = "orjson-3.10.3-cp312-none-win_amd64.whl", hash = "sha256:4c895383b1ec42b017dd2c75ae8a5b862fc489006afde06f14afbdd0309b2af0"},
-    {file = "orjson-3.10.3-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:be2719e5041e9fb76c8c2c06b9600fe8e8584e6980061ff88dcbc2691a16d20d"},
-    {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0175a5798bdc878956099f5c54b9837cb62cfbf5d0b86ba6d77e43861bcec2"},
-    {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:978be58a68ade24f1af7758626806e13cff7748a677faf95fbb298359aa1e20d"},
-    {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16bda83b5c61586f6f788333d3cf3ed19015e3b9019188c56983b5a299210eb5"},
-    {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ad1f26bea425041e0a1adad34630c4825a9e3adec49079b1fb6ac8d36f8b754"},
-    {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9e253498bee561fe85d6325ba55ff2ff08fb5e7184cd6a4d7754133bd19c9195"},
-    {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0a62f9968bab8a676a164263e485f30a0b748255ee2f4ae49a0224be95f4532b"},
-    {file = "orjson-3.10.3-cp38-none-win32.whl", hash = "sha256:8d0b84403d287d4bfa9bf7d1dc298d5c1c5d9f444f3737929a66f2fe4fb8f134"},
-    {file = "orjson-3.10.3-cp38-none-win_amd64.whl", hash = "sha256:8bc7a4df90da5d535e18157220d7915780d07198b54f4de0110eca6b6c11e290"},
-    {file = "orjson-3.10.3-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9059d15c30e675a58fdcd6f95465c1522b8426e092de9fff20edebfdc15e1cb0"},
-    {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d40c7f7938c9c2b934b297412c067936d0b54e4b8ab916fd1a9eb8f54c02294"},
-    {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4a654ec1de8fdaae1d80d55cee65893cb06494e124681ab335218be6a0691e7"},
-    {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:831c6ef73f9aa53c5f40ae8f949ff7681b38eaddb6904aab89dca4d85099cb78"},
-    {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99b880d7e34542db89f48d14ddecbd26f06838b12427d5a25d71baceb5ba119d"},
-    {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2e5e176c994ce4bd434d7aafb9ecc893c15f347d3d2bbd8e7ce0b63071c52e25"},
-    {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b69a58a37dab856491bf2d3bbf259775fdce262b727f96aafbda359cb1d114d8"},
-    {file = "orjson-3.10.3-cp39-none-win32.whl", hash = "sha256:b8d4d1a6868cde356f1402c8faeb50d62cee765a1f7ffcfd6de732ab0581e063"},
-    {file = "orjson-3.10.3-cp39-none-win_amd64.whl", hash = "sha256:5102f50c5fc46d94f2033fe00d392588564378260d64377aec702f21a7a22912"},
-    {file = "orjson-3.10.3.tar.gz", hash = "sha256:2b166507acae7ba2f7c315dcf185a9111ad5e992ac81f2d507aac39193c2c818"},
+    {file = "orjson-3.10.6-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:fb0ee33124db6eaa517d00890fc1a55c3bfe1cf78ba4a8899d71a06f2d6ff5c7"},
+    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1c4b53b24a4c06547ce43e5fee6ec4e0d8fe2d597f4647fc033fd205707365"},
+    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eadc8fd310edb4bdbd333374f2c8fec6794bbbae99b592f448d8214a5e4050c0"},
+    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61272a5aec2b2661f4fa2b37c907ce9701e821b2c1285d5c3ab0207ebd358d38"},
+    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57985ee7e91d6214c837936dc1608f40f330a6b88bb13f5a57ce5257807da143"},
+    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:633a3b31d9d7c9f02d49c4ab4d0a86065c4a6f6adc297d63d272e043472acab5"},
+    {file = "orjson-3.10.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1c680b269d33ec444afe2bdc647c9eb73166fa47a16d9a75ee56a374f4a45f43"},
+    {file = "orjson-3.10.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f759503a97a6ace19e55461395ab0d618b5a117e8d0fbb20e70cfd68a47327f2"},
+    {file = "orjson-3.10.6-cp310-none-win32.whl", hash = "sha256:95a0cce17f969fb5391762e5719575217bd10ac5a189d1979442ee54456393f3"},
+    {file = "orjson-3.10.6-cp310-none-win_amd64.whl", hash = "sha256:df25d9271270ba2133cc88ee83c318372bdc0f2cd6f32e7a450809a111efc45c"},
+    {file = "orjson-3.10.6-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:b1ec490e10d2a77c345def52599311849fc063ae0e67cf4f84528073152bb2ba"},
+    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d43d3feb8f19d07e9f01e5b9be4f28801cf7c60d0fa0d279951b18fae1932b"},
+    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac3045267e98fe749408eee1593a142e02357c5c99be0802185ef2170086a863"},
+    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c27bc6a28ae95923350ab382c57113abd38f3928af3c80be6f2ba7eb8d8db0b0"},
+    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d27456491ca79532d11e507cadca37fb8c9324a3976294f68fb1eff2dc6ced5a"},
+    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05ac3d3916023745aa3b3b388e91b9166be1ca02b7c7e41045da6d12985685f0"},
+    {file = "orjson-3.10.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1335d4ef59ab85cab66fe73fd7a4e881c298ee7f63ede918b7faa1b27cbe5212"},
+    {file = "orjson-3.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4bbc6d0af24c1575edc79994c20e1b29e6fb3c6a570371306db0993ecf144dc5"},
+    {file = "orjson-3.10.6-cp311-none-win32.whl", hash = "sha256:450e39ab1f7694465060a0550b3f6d328d20297bf2e06aa947b97c21e5241fbd"},
+    {file = "orjson-3.10.6-cp311-none-win_amd64.whl", hash = "sha256:227df19441372610b20e05bdb906e1742ec2ad7a66ac8350dcfd29a63014a83b"},
+    {file = "orjson-3.10.6-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ea2977b21f8d5d9b758bb3f344a75e55ca78e3ff85595d248eee813ae23ecdfb"},
+    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6f3d167d13a16ed263b52dbfedff52c962bfd3d270b46b7518365bcc2121eed"},
+    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f710f346e4c44a4e8bdf23daa974faede58f83334289df80bc9cd12fe82573c7"},
+    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7275664f84e027dcb1ad5200b8b18373e9c669b2a9ec33d410c40f5ccf4b257e"},
+    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0943e4c701196b23c240b3d10ed8ecd674f03089198cf503105b474a4f77f21f"},
+    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:446dee5a491b5bc7d8f825d80d9637e7af43f86a331207b9c9610e2f93fee22a"},
+    {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:64c81456d2a050d380786413786b057983892db105516639cb5d3ee3c7fd5148"},
+    {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"},
+    {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"},
+    {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"},
+    {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"},
+    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"},
+    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"},
+    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2c116072a8533f2fec435fde4d134610f806bdac20188c7bd2081f3e9e0133f"},
+    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6eeb13218c8cf34c61912e9df2de2853f1d009de0e46ea09ccdf3d757896af0a"},
+    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:965a916373382674e323c957d560b953d81d7a8603fbeee26f7b8248638bd48b"},
+    {file = "orjson-3.10.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03c95484d53ed8e479cade8628c9cea00fd9d67f5554764a1110e0d5aa2de96e"},
+    {file = "orjson-3.10.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:e060748a04cccf1e0a6f2358dffea9c080b849a4a68c28b1b907f272b5127e9b"},
+    {file = "orjson-3.10.6-cp38-none-win32.whl", hash = "sha256:738dbe3ef909c4b019d69afc19caf6b5ed0e2f1c786b5d6215fbb7539246e4c6"},
+    {file = "orjson-3.10.6-cp38-none-win_amd64.whl", hash = "sha256:d40f839dddf6a7d77114fe6b8a70218556408c71d4d6e29413bb5f150a692ff7"},
+    {file = "orjson-3.10.6-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:697a35a083c4f834807a6232b3e62c8b280f7a44ad0b759fd4dce748951e70db"},
+    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd502f96bf5ea9a61cbc0b2b5900d0dd68aa0da197179042bdd2be67e51a1e4b"},
+    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f215789fb1667cdc874c1b8af6a84dc939fd802bf293a8334fce185c79cd359b"},
+    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2debd8ddce948a8c0938c8c93ade191d2f4ba4649a54302a7da905a81f00b56"},
+    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5410111d7b6681d4b0d65e0f58a13be588d01b473822483f77f513c7f93bd3b2"},
+    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb1f28a137337fdc18384079fa5726810681055b32b92253fa15ae5656e1dddb"},
+    {file = "orjson-3.10.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bf2fbbce5fe7cd1aa177ea3eab2b8e6a6bc6e8592e4279ed3db2d62e57c0e1b2"},
+    {file = "orjson-3.10.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:79b9b9e33bd4c517445a62b90ca0cc279b0f1f3970655c3df9e608bc3f91741a"},
+    {file = "orjson-3.10.6-cp39-none-win32.whl", hash = "sha256:30b0a09a2014e621b1adf66a4f705f0809358350a757508ee80209b2d8dae219"},
+    {file = "orjson-3.10.6-cp39-none-win_amd64.whl", hash = "sha256:49e3bc615652617d463069f91b867a4458114c5b104e13b7ae6872e5f79d0844"},
+    {file = "orjson-3.10.6.tar.gz", hash = "sha256:e54b63d0a7c6c54a5f5f726bc93a2078111ef060fec4ecbf34c5db800ca3b3a7"},
 ]
 
 [package.source]
@@ -179,13 +184,13 @@ reference = "tsinghua"
 
 [[package]]
 name = "packaging"
-version = "24.0"
+version = "24.1"
 description = "Core utilities for Python packages"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
-    {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
+    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
+    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
 ]
 
 [package.source]
@@ -195,7 +200,7 @@ reference = "tsinghua"
 
 [[package]]
 name = "paicorelib"
-version = "1.1.6"
+version = "1.3.0a1"
 description = "Library of PAICORE 2.0"
 optional = false
 python-versions = "^3.9"
@@ -204,13 +209,13 @@ develop = false
 
 [package.dependencies]
 numpy = "^1.26.0"
-pydantic = "^2.0"
+pydantic = "^2.0.3"
 
 [package.source]
 type = "git"
 url = "https://github.com/PAICookers/PAIlib.git"
 reference = "dev"
-resolved_reference = "81eb16bc8bd9a1d2ebc03d0c868784d47574ae9e"
+resolved_reference = "041f4451c01c1d6710c51eece9fa3e98edffdac4"
 
 [[package]]
 name = "pluggy"
@@ -234,19 +239,22 @@ reference = "tsinghua"
 
 [[package]]
 name = "pydantic"
-version = "2.7.1"
+version = "2.8.2"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"},
-    {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"},
+    {file = "pydantic-2.8.2-py3-none-any.whl", hash = "sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8"},
+    {file = "pydantic-2.8.2.tar.gz", hash = "sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a"},
 ]
 
 [package.dependencies]
 annotated-types = ">=0.4.0"
-pydantic-core = "2.18.2"
-typing-extensions = ">=4.6.1"
+pydantic-core = "2.20.1"
+typing-extensions = [
+    {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
+    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
+]
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
@@ -258,90 +266,100 @@ reference = "tsinghua"
 
 [[package]]
 name = "pydantic-core"
-version = "2.18.2"
+version = "2.20.1"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"},
-    {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"},
-    {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"},
-    {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"},
-    {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"},
-    {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"},
-    {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"},
-    {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"},
-    {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"},
-    {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"},
-    {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"},
-    {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"},
-    {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"},
-    {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f239eb799a2081495ea659d8d4a43a8f42cd1fe9ff2e7e436295c38a10c286a"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53e431da3fc53360db73eedf6f7124d1076e1b4ee4276b36fb25514544ceb4a3"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1f62b2413c3a0e846c3b838b2ecd6c7a19ec6793b2a522745b0869e37ab5bc1"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d41e6daee2813ecceea8eda38062d69e280b39df793f5a942fa515b8ed67953"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d482efec8b7dc6bfaedc0f166b2ce349df0011f5d2f1f25537ced4cfc34fd98"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e93e1a4b4b33daed65d781a57a522ff153dcf748dee70b40c7258c5861e1768a"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7c4ea22b6739b162c9ecaaa41d718dfad48a244909fe7ef4b54c0b530effc5a"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4f2790949cf385d985a31984907fecb3896999329103df4e4983a4a41e13e840"},
+    {file = "pydantic_core-2.20.1-cp310-none-win32.whl", hash = "sha256:5e999ba8dd90e93d57410c5e67ebb67ffcaadcea0ad973240fdfd3a135506250"},
+    {file = "pydantic_core-2.20.1-cp310-none-win_amd64.whl", hash = "sha256:512ecfbefef6dac7bc5eaaf46177b2de58cdf7acac8793fe033b24ece0b9566c"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d2a8fa9d6d6f891f3deec72f5cc668e6f66b188ab14bb1ab52422fe8e644f312"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:175873691124f3d0da55aeea1d90660a6ea7a3cfea137c38afa0a5ffabe37b88"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37eee5b638f0e0dcd18d21f59b679686bbd18917b87db0193ae36f9c23c355fc"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25e9185e2d06c16ee438ed39bf62935ec436474a6ac4f9358524220f1b236e43"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:150906b40ff188a3260cbee25380e7494ee85048584998c1e66df0c7a11c17a6"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ad4aeb3e9a97286573c03df758fc7627aecdd02f1da04516a86dc159bf70121"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3f3ed29cd9f978c604708511a1f9c2fdcb6c38b9aae36a51905b8811ee5cbf1"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0dae11d8f5ded51699c74d9548dcc5938e0804cc8298ec0aa0da95c21fff57b"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faa6b09ee09433b87992fb5a2859efd1c264ddc37280d2dd5db502126d0e7f27"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9dc1b507c12eb0481d071f3c1808f0529ad41dc415d0ca11f7ebfc666e66a18b"},
+    {file = "pydantic_core-2.20.1-cp311-none-win32.whl", hash = "sha256:fa2fddcb7107e0d1808086ca306dcade7df60a13a6c347a7acf1ec139aa6789a"},
+    {file = "pydantic_core-2.20.1-cp311-none-win_amd64.whl", hash = "sha256:40a783fb7ee353c50bd3853e626f15677ea527ae556429453685ae32280c19c2"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:595ba5be69b35777474fa07f80fc260ea71255656191adb22a8c53aba4479231"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a4f55095ad087474999ee28d3398bae183a66be4823f753cd7d67dd0153427c9"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9aa05d09ecf4c75157197f27cdc9cfaeb7c5f15021c6373932bf3e124af029f"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e97fdf088d4b31ff4ba35db26d9cc472ac7ef4a2ff2badeabf8d727b3377fc52"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc633a9fe1eb87e250b5c57d389cf28998e4292336926b0b6cdaee353f89a237"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d573faf8eb7e6b1cbbcb4f5b247c60ca8be39fe2c674495df0eb4318303137fe"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26dc97754b57d2fd00ac2b24dfa341abffc380b823211994c4efac7f13b9e90e"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:33499e85e739a4b60c9dac710c20a08dc73cb3240c9a0e22325e671b27b70d24"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bebb4d6715c814597f85297c332297c6ce81e29436125ca59d1159b07f423eb1"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:516d9227919612425c8ef1c9b869bbbee249bc91912c8aaffb66116c0b447ebd"},
+    {file = "pydantic_core-2.20.1-cp312-none-win32.whl", hash = "sha256:469f29f9093c9d834432034d33f5fe45699e664f12a13bf38c04967ce233d688"},
+    {file = "pydantic_core-2.20.1-cp312-none-win_amd64.whl", hash = "sha256:035ede2e16da7281041f0e626459bcae33ed998cca6a0a007a5ebb73414ac72d"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0827505a5c87e8aa285dc31e9ec7f4a17c81a813d45f70b1d9164e03a813a686"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:19c0fa39fa154e7e0b7f82f88ef85faa2a4c23cc65aae2f5aea625e3c13c735a"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa223cd1e36b642092c326d694d8bf59b71ddddc94cdb752bbbb1c5c91d833b"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c336a6d235522a62fef872c6295a42ecb0c4e1d0f1a3e500fe949415761b8a19"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7eb6a0587eded33aeefea9f916899d42b1799b7b14b8f8ff2753c0ac1741edac"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70c8daf4faca8da5a6d655f9af86faf6ec2e1768f4b8b9d0226c02f3d6209703"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9fa4c9bf273ca41f940bceb86922a7667cd5bf90e95dbb157cbb8441008482c"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11b71d67b4725e7e2a9f6e9c0ac1239bbc0c48cce3dc59f98635efc57d6dac83"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:270755f15174fb983890c49881e93f8f1b80f0b5e3a3cc1394a255706cabd203"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c81131869240e3e568916ef4c307f8b99583efaa60a8112ef27a366eefba8ef0"},
+    {file = "pydantic_core-2.20.1-cp313-none-win32.whl", hash = "sha256:b91ced227c41aa29c672814f50dbb05ec93536abf8f43cd14ec9521ea09afe4e"},
+    {file = "pydantic_core-2.20.1-cp313-none-win_amd64.whl", hash = "sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4745f4ac52cc6686390c40eaa01d48b18997cb130833154801a442323cc78f91"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a8ad4c766d3f33ba8fd692f9aa297c9058970530a32c728a2c4bfd2616d3358b"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41e81317dd6a0127cabce83c0c9c3fbecceae981c8391e6f1dec88a77c8a569a"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04024d270cf63f586ad41fff13fde4311c4fc13ea74676962c876d9577bcc78f"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaad4ff2de1c3823fddf82f41121bdf453d922e9a238642b1dedb33c4e4f98ad"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26ab812fa0c845df815e506be30337e2df27e88399b985d0bb4e3ecfe72df31c"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c5ebac750d9d5f2706654c638c041635c385596caf68f81342011ddfa1e5598"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2aafc5a503855ea5885559eae883978c9b6d8c8993d67766ee73d82e841300dd"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4868f6bd7c9d98904b748a2653031fc9c2f85b6237009d475b1008bfaeb0a5aa"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa2f457b4af386254372dfa78a2eda2563680d982422641a85f271c859df1987"},
+    {file = "pydantic_core-2.20.1-cp38-none-win32.whl", hash = "sha256:225b67a1f6d602de0ce7f6c1c3ae89a4aa25d3de9be857999e9124f15dab486a"},
+    {file = "pydantic_core-2.20.1-cp38-none-win_amd64.whl", hash = "sha256:6b507132dcfc0dea440cce23ee2182c0ce7aba7054576efc65634f080dbe9434"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b03f7941783b4c4a26051846dea594628b38f6940a2fdc0df00b221aed39314c"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1eedfeb6089ed3fad42e81a67755846ad4dcc14d73698c120a82e4ccf0f1f9f6"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:635fee4e041ab9c479e31edda27fcf966ea9614fff1317e280d99eb3e5ab6fe2"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:77bf3ac639c1ff567ae3b47f8d4cc3dc20f9966a2a6dd2311dcc055d3d04fb8a"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ed1b0132f24beeec5a78b67d9388656d03e6a7c837394f99257e2d55b461611"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6514f963b023aeee506678a1cf821fe31159b925c4b76fe2afa94cc70b3222b"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d4204d8ca33146e761c79f83cc861df20e7ae9f6487ca290a97702daf56006"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2d036c7187b9422ae5b262badb87a20a49eb6c5238b2004e96d4da1231badef1"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9ebfef07dbe1d93efb94b4700f2d278494e9162565a54f124c404a5656d7ff09"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6b9d9bb600328a1ce523ab4f454859e9d439150abb0906c5a1983c146580ebab"},
+    {file = "pydantic_core-2.20.1-cp39-none-win32.whl", hash = "sha256:784c1214cb6dd1e3b15dd8b91b9a53852aed16671cc3fbe4786f4f1db07089e2"},
+    {file = "pydantic_core-2.20.1-cp39-none-win_amd64.whl", hash = "sha256:d2fe69c5434391727efa54b47a1e7986bb0186e72a41b203df8f5b0a19a4f669"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a45f84b09ac9c3d35dfcf6a27fd0634d30d183205230a0ebe8373a0e8cfa0906"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d02a72df14dfdbaf228424573a07af10637bd490f0901cee872c4f434a735b94"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b27e6af28f07e2f195552b37d7d66b150adbaa39a6d327766ffd695799780f"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084659fac3c83fd674596612aeff6041a18402f1e1bc19ca39e417d554468482"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:242b8feb3c493ab78be289c034a1f659e8826e2233786e36f2893a950a719bb6"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:38cf1c40a921d05c5edc61a785c0ddb4bed67827069f535d794ce6bcded919fc"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e0bbdd76ce9aa5d4209d65f2b27fc6e5ef1312ae6c5333c26db3f5ade53a1e99"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:407653af5617f0757261ae249d3fba09504d7a71ab36ac057c938572d1bc9331"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c693e916709c2465b02ca0ad7b387c4f8423d1db7b4649c551f27a529181c5ad"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b5ff4911aea936a47d9376fd3ab17e970cc543d1b68921886e7f64bd28308d1"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177f55a886d74f1808763976ac4efd29b7ed15c69f4d838bbd74d9d09cf6fa86"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:964faa8a861d2664f0c7ab0c181af0bea66098b1919439815ca8803ef136fc4e"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4dd484681c15e6b9a977c785a345d3e378d72678fd5f1f3c0509608da24f2ac0"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f6d6cff3538391e8486a431569b77921adfcdef14eb18fbf19b7c0a5294d4e6a"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6d511cc297ff0883bc3708b465ff82d7560193169a8b93260f74ecb0a5e08a7"},
+    {file = "pydantic_core-2.20.1.tar.gz", hash = "sha256:26ca695eeee5f9f1aeeb211ffc12f10bcb6f71e2989988fda61dabd65db878d4"},
 ]
 
 [package.dependencies]
@@ -354,13 +372,13 @@ reference = "tsinghua"
 
 [[package]]
 name = "pytest"
-version = "8.2.0"
+version = "8.2.2"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"},
-    {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"},
+    {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"},
+    {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"},
 ]
 
 [package.dependencies]
@@ -416,13 +434,13 @@ reference = "tsinghua"
 
 [[package]]
 name = "typing-extensions"
-version = "4.11.0"
+version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"},
-    {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"},
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
 [package.source]
@@ -433,4 +451,4 @@ reference = "tsinghua"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "c4a9fbd1402e36637c204c66ab5fe5defece23de4292189e1c364c0442d345b1"
+content-hash = "ddc3c2f447aeb01635bbdaefe64e188ad015b016d082d956a06e1d73b39dd132"
diff --git a/pyproject.toml b/pyproject.toml
index d0899cf7..adad889e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ include = ["docs/Guide-of-PAIBox.md", "CHANGELOG.md"]
 python = "^3.9"
 pydantic = "^2.0.3"
 numpy = "^1.26.0"
-paicorelib = "^1.1.6"
+paicorelib = "~1.3"
 
 [tool.poetry.group.test]
 optional = true

From acd05acb0d21b565dbd9106db3ea9f123310585b Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 19 Jul 2024 10:21:43 +0800
Subject: [PATCH 020/187] =?UTF-8?q?=F0=9F=9A=9A=20rename=20`WeightWidth`?=
 =?UTF-8?q?=20&=20related=20ref?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/checker.py                  |  4 +-
 paibox/backend/conf_template.py            |  4 +-
 paibox/backend/placement.py                | 29 ++++++------
 paibox/base.py                             |  4 +-
 paibox/components/synapses/base.py         |  6 +--
 paibox/components/synapses/transforms.py   | 25 +++++-----
 tests/backend/conftest.py                  | 54 +++++++++++-----------
 tests/backend/test_mapper.py               | 10 ++--
 tests/components/synapses/test_synapses.py | 22 ++++-----
 9 files changed, 78 insertions(+), 80 deletions(-)

diff --git a/paibox/backend/checker.py b/paibox/backend/checker.py
index 6439e9e0..b4844e01 100644
--- a/paibox/backend/checker.py
+++ b/paibox/backend/checker.py
@@ -1,5 +1,5 @@
 from paicorelib import LCN_EX
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 
 __all__ = ["ConfigChecker"]
 
@@ -10,7 +10,7 @@ class _Checker:
 
 class ConfigChecker(_Checker):
     @staticmethod
-    def n_config_estimate(n_neuron: int, wp: WP, lcn_ex: LCN_EX) -> int:
+    def n_config_estimate(n_neuron: int, wp: WW, lcn_ex: LCN_EX) -> int:
         _base = n_neuron * (1 << wp) * (1 << lcn_ex)
 
         n_total = 3 + 3 + (1 + 4 * _base) + (1 + 18 * _base)
diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index 34276c8d..f58a5647 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -25,7 +25,7 @@
     RoutingCoord,
     SNNModeEnable,
     SpikeWidthFormat,
-    WeightPrecision,
+    WeightWidth,
     get_replication_id,
 )
 from paicorelib.framelib import types as flib_types
@@ -98,7 +98,7 @@ class CoreConfig(NamedTuple):
     """Extra parameters for debugging."""
 
     name: str
-    weight_precision: WeightPrecision
+    weight_width: WeightWidth
     lcn_extension: LCN_EX
     input_width_format: InputWidthFormat
     spike_width_format: SpikeWidthFormat
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index f5e3bd71..793090a2 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 from paicorelib import LCN_EX, ChipCoord, Coord, CoreMode, HwConfig, MaxPoolingEnable
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 
 from paibox.components import FullConnectedSyn, Neuron
 from paibox.exceptions import (
@@ -189,14 +189,14 @@ def n_core_required(self) -> int:
         return len(self.neuron_segs_of_cb)
 
     @property
-    def weight_precision(self) -> WP:
-        # Optimized in `s.weight_precision`.
-        return max(s.weight_precision for s in self.obj)
+    def weight_width(self) -> WW:
+        # `weight_width` is optimized in FullConnectedSyn.
+        return max(s.weight_width for s in self.obj)
 
     @property
     def n_weight_bits(self) -> int:
         """Multiple dendrites will be combined to achieve higher precision weights."""
-        return 1 << self.weight_precision
+        return 1 << self.weight_width
 
     @property
     def lcn_ex(self) -> LCN_EX:
@@ -219,7 +219,7 @@ def n_timeslot(self) -> int:
     @property
     def dendrite_comb_rate(self) -> int:
         """#N of dendrites will be combined."""
-        return self.lcn_ex + self.weight_precision
+        return self.lcn_ex + self.weight_width
 
     @property
     def tws(self) -> int:
@@ -331,7 +331,7 @@ def get_raw_weight_of_coord(self, idx: int) -> list[WeightType]:
     def n_neuron_repl(self) -> int:
         """The number of neurons that need to be repeatedly placed into NRAM.
 
-        For example, in SNN mode, N[0:3] with LCN_2X & WP8:
+        For example, in SNN mode, N[0:3] with LCN_2X & WW8:
             NRAM [0]  [1]  ... [15] [16] [17] ... [31] ...
                  N[0] N[0] ... N[0] N[1] N[1] ... N[1] ...
 
@@ -483,12 +483,12 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
             
             A portion of the fan-in needs to be expanded to an unfilled portion in the direction of the weight      \
             accuracy. At this point, n_fold=n_timeslot/(8/n_weight_bits)=2^(dendrite_comb_rate - 3). For example,   \
-            for LCN_8X & WP8, the n_fold is 3. For LCN_32X & WP4, the n_fold is 4 (instead of 5).
+            for LCN_8X & WW8, the n_fold is 3. For LCN_32X & WW4, the n_fold is 4 (instead of 5).
             
         TODO Now, in ANN mode, only the mapping of 8-bit weights is supported. The weight accuracy optimization is  \
             supposed to disable manually for now.
         """
-        if not self.rt_mode.is_snn and self.weight_precision < WP.WEIGHT_WIDTH_8BIT:
+        if not self.rt_mode.is_snn and self.weight_width < WW.WEIGHT_WIDTH_8BIT:
             raise NotSupportedError("only support 8-bit weights in ANN mode.")
 
         _weights_folded = self._fold_raw_weights(self.raw_weights)
@@ -588,7 +588,7 @@ def export_param_config(self) -> CoreConfig:
         # fmt: off
         cb_config = CoreConfig(
             self.name,                          # name of the core
-            self.weight_precision,              # weight_precision
+            self.weight_width,             # weight_precision
             self.lcn_ex,                        # lcn_extension
             _mode_params[0],                    # input_width_format
             _mode_params[1],                    # spike_width_format
@@ -671,7 +671,6 @@ def export_neu_config(
 
     def export_core_plm_config(self) -> CorePlmConfig:
         core_param = self.export_param_config()
-
         return CorePlmConfig.encapsulate(
             self.parent.seed, self.weight_ram, core_param, self.neu_configs
         )
@@ -679,10 +678,10 @@ def export_core_plm_config(self) -> CorePlmConfig:
     @property
     def shape(self) -> tuple[int, int]:
         return (len(self.source), len(self.dest))
- 
+
     @property
-    def weight_precision(self) -> WP:
-        return self.parent.weight_precision
+    def weight_width(self) -> WW:
+        return self.parent.weight_width
 
     @property
     def n_weight_bits(self) -> int:
@@ -764,7 +763,7 @@ def export_param_config(self) -> CoreConfig:
         # fmt: off
         cb_config = CoreConfig(
             self.name,                          # name of the core
-            WP.WEIGHT_WIDTH_1BIT,               # weight_precision
+            WW.WEIGHT_WIDTH_1BIT,               # weight_precision
             LCN_EX.LCN_1X,                      # lcn_extension
             _mode_params[0],                    # input_width_format
             _mode_params[1],                    # spike_width_format
diff --git a/paibox/base.py b/paibox/base.py
index efe3421a..f0dc8c1a 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -8,7 +8,7 @@
 else:
     from typing_extensions import TypeAlias
 
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 
 from .collector import Collector
 from .mixin import ReceiveInputProj, StatusMemory, TimeRelatedNode
@@ -296,7 +296,7 @@ def weights(self) -> WeightType:
         raise NotImplementedError
 
     @property
-    def weight_precision(self) -> WP:
+    def weight_width(self) -> WW:
         raise NotImplementedError
 
     @property
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index fe3f75a4..52b09ae1 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 from paicorelib import HwConfig
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 
 from paibox.base import NeuDyn, SynSys
 from paibox.exceptions import RegisterError, ShapeError
@@ -185,8 +185,8 @@ def weights(self) -> WeightType:
         return self.comm.weights
 
     @property
-    def weight_precision(self) -> WP:
-        return self.comm._get_wp(self.CFLAG_ENABLE_WP_OPTIMIZATION)
+    def weight_width(self) -> WW:
+        return self.comm._get_weight_width(self.CFLAG_ENABLE_WP_OPTIMIZATION)
 
     @property
     def connectivity(self) -> WeightType:
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 78807f2d..9ae205b6 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -3,7 +3,7 @@
 from typing import Literal, Optional
 
 import numpy as np
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 
 from paibox.exceptions import AutoOptimizationWarning, ShapeError
 from paibox.types import (
@@ -13,7 +13,7 @@
     NeuOutType,
     SynOutType,
     WeightType,
-    VOLTAGE_DTYPE
+    VOLTAGE_DTYPE,
 )
 from paibox.utils import is_shape, shape2num, typical_round
 
@@ -109,22 +109,21 @@ def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
     return _array.astype(_dtype, casting="same_kind")
 
 
-def _get_weight_precision(weight: WeightType, enable_wp_opt: bool) -> WP:
-    """Get the actual weight_precision of the weight."""
-    _max = np.max(weight, axis=None)
-    _min = np.min(weight, axis=None)
+def _get_weight_width_inner(weight: WeightType, enable_wp_opt: bool) -> WW:
+    """Get the actual width of the weight."""
+    _max, _min = np.max(weight), np.min(weight)
 
     if enable_wp_opt:
         if _max <= MAX_INT1 and _min >= MIN_INT1:
-            return WP.WEIGHT_WIDTH_1BIT
+            return WW.WEIGHT_WIDTH_1BIT
         elif _max <= MAX_INT2 and _min >= MIN_INT2:
-            return WP.WEIGHT_WIDTH_2BIT
+            return WW.WEIGHT_WIDTH_2BIT
         elif _max <= MAX_INT4 and _min >= MIN_INT4:
-            return WP.WEIGHT_WIDTH_4BIT
+            return WW.WEIGHT_WIDTH_4BIT
         else:
-            return WP.WEIGHT_WIDTH_8BIT
+            return WW.WEIGHT_WIDTH_8BIT
     else:
-        return WP.WEIGHT_WIDTH_8BIT
+        return WW.WEIGHT_WIDTH_8BIT
 
 
 class Transform:
@@ -140,8 +139,8 @@ def __call__(self, *args, **kwargs) -> SynOutType:
             "function '__call__' must be implemented in the subclasses."
         )
 
-    def _get_wp(self, enable_wp_opt: bool) -> WP:
-        return _get_weight_precision(self.weights, enable_wp_opt)
+    def _get_weight_width(self, enable_wp_opt: bool) -> WW:
+        return _get_weight_width_inner(self.weights, enable_wp_opt)
 
     @property
     def connectivity(self) -> WeightType:
diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index 1063ea88..21315b3a 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -15,7 +15,7 @@
     RoutingDirection,
     RoutingLevel,
 )
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 from paicorelib.reg_model import TICK_WAIT_END_MAX, TICK_WAIT_START_MAX
 
 import paibox as pb
@@ -811,7 +811,7 @@ def get_mapper() -> pb.Mapper:
 
 @pytest.fixture
 def MockCoreConfigDict() -> CoreConfig:
-    wp = random.choice(list(WP))
+    wp = random.choice(list(WW))
     lcn_ex = random.choice(list(LCN_EX))
 
     iwf, swf, sme = random.choice(list(CoreMode)).conf
@@ -1276,61 +1276,61 @@ class TestData:
                 ((0, 2), (0, 2)),
                 1,
                 (np.bool_, np.bool_),
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 -1,
                 (np.bool_, np.bool_),
-                WP.WEIGHT_WIDTH_2BIT,
+                WW.WEIGHT_WIDTH_2BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 1,
                 (np.bool_, np.int8),
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 -2,
                 (np.int8, np.bool_),
-                WP.WEIGHT_WIDTH_2BIT,
+                WW.WEIGHT_WIDTH_2BIT,
             ),
             (
                 ((0, 2), (0, 2)),
                 1,
                 (np.int8, np.int8),
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
             ),
             (
                 ((0, 2), (-2, 2)),
                 -8,
                 (np.bool_, np.int8),
-                WP.WEIGHT_WIDTH_4BIT,
+                WW.WEIGHT_WIDTH_4BIT,
             ),
             (
                 ((0, 2), (-2, 2)),
                 7,
                 (np.bool_, np.int8),
-                WP.WEIGHT_WIDTH_4BIT,
+                WW.WEIGHT_WIDTH_4BIT,
             ),
             (
                 ((0, 2), (-128, 128)),
                 127,
                 (np.bool_, np.int8),
-                WP.WEIGHT_WIDTH_8BIT,
+                WW.WEIGHT_WIDTH_8BIT,
             ),
             (
                 ((-2, 2), (-8, 8)),
                 7,
                 (np.int8, np.int8),
-                WP.WEIGHT_WIDTH_4BIT,
+                WW.WEIGHT_WIDTH_4BIT,
             ),
             (
                 ((-8, 8), (-8, 8)),
                 -100,
                 (np.int8, np.int8),
-                WP.WEIGHT_WIDTH_8BIT,
+                WW.WEIGHT_WIDTH_8BIT,
             ),
         ],
     )
@@ -1342,7 +1342,7 @@ class TestData:
             (
                 [_nl[0], _nl[1]],
                 512,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_1X,
                 [
                     [NeuSegment(_nl[0], slice(0, 300, 1), 0)],
@@ -1354,7 +1354,7 @@ class TestData:
             (
                 [_nl[0], _nl[1]],
                 256,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [NeuSegment(_nl[0], slice(0, 200, 1), 0, 2)],
@@ -1369,7 +1369,7 @@ class TestData:
             (
                 [_nl[2]],
                 200,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [NeuSegment(_nl[2], slice(80 * 0, 80 * 1, 1), 0, 2)],
@@ -1381,7 +1381,7 @@ class TestData:
             (
                 [_nl[0], _nl[2]],
                 400,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_1X,
                 [
                     [NeuSegment(_nl[0], slice(0, 300, 1), 0)],
@@ -1393,7 +1393,7 @@ class TestData:
             (
                 [_nl[3], _nl[4]],
                 240,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [NeuSegment(_nl[3], slice(67 * 0, 67 * 1, 1), 0, 2)],
@@ -1415,7 +1415,7 @@ class TestData:
             (
                 [_nc[0], _nc[1]],
                 512,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_1X,
                 [
                     [NeuSegment(_nc[0], slice(0, 512, 1), 0)],
@@ -1429,7 +1429,7 @@ class TestData:
             (
                 [_nc[0], _nc[1]],
                 256,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [NeuSegment(_nc[0], slice(256 * 0, 256 * 1, 1), 0, 2)],
@@ -1446,7 +1446,7 @@ class TestData:
             (
                 [_nc[3], _nc[4]],
                 256,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     # Place the neuron segments with full capacity first
@@ -1460,7 +1460,7 @@ class TestData:
             (
                 [_nc[5], _nc[6]],
                 512,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_1X,
                 [
                     [NeuSegment(_nc[6], slice(0, 500, 1), 0, 1)],
@@ -1477,7 +1477,7 @@ class TestData:
             (
                 [_nb[0], _nb[1]],
                 512,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_1X,
                 [
                     [NeuSegment(_nb[0], slice(0, 300, 1), 0)],
@@ -1489,7 +1489,7 @@ class TestData:
             (
                 [_nb[0], _nb[1]],
                 256,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [NeuSegment(_nb[1], slice(0, 200, 1), 0, 2)],
@@ -1504,7 +1504,7 @@ class TestData:
             (
                 [_nb[2]],
                 200,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [NeuSegment(_nb[2], slice(80 * 0, 80 * 1, 1), 0, 2)],
@@ -1516,7 +1516,7 @@ class TestData:
             (
                 [_nb[2], _nb[3]],
                 200,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [
@@ -1542,7 +1542,7 @@ class TestData:
             (
                 [_nb[2], _nb[3], _nb[4]],
                 256,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [
@@ -1586,7 +1586,7 @@ class TestData:
             (
                 [_nb[3], _nb[4]],
                 240,
-                WP.WEIGHT_WIDTH_1BIT,
+                WW.WEIGHT_WIDTH_1BIT,
                 LCN_EX.LCN_2X,
                 [
                     [
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 21f3cabbc..73e5ea84 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pytest
-from paicorelib import Coord, HwConfig, WeightPrecision as WP
+from paicorelib import Coord, HwConfig, WeightWidth as WW
 
 import paibox as pb
 from paibox.base import SynSys
@@ -576,15 +576,15 @@ def __init__(self):
         mapper = pb.Mapper()
         mapper.build(net)
         mapper.compile(weight_bit_optimization=False)
-        assert mapper.core_blocks[0].weight_precision == WP.WEIGHT_WIDTH_8BIT
+        assert mapper.core_blocks[0].weight_width == WW.WEIGHT_WIDTH_8BIT
 
         mapper.clear()
         mapper.build(net)
         mapper.compile(weight_bit_optimization=True)
-        assert mapper.core_blocks[0].weight_precision == max(
-            s.weight_precision for s in (net.s1, net.s2, net.s3)
+        assert mapper.core_blocks[0].weight_width == max(
+            s.weight_width for s in (net.s1, net.s2, net.s3)
         )
-        assert mapper.core_blocks[0].weight_precision == expected_wp_opt
+        assert mapper.core_blocks[0].weight_width == expected_wp_opt
 
 
 from tests.utils import measure_time
diff --git a/tests/components/synapses/test_synapses.py b/tests/components/synapses/test_synapses.py
index 6378b4de..506963ac 100644
--- a/tests/components/synapses/test_synapses.py
+++ b/tests/components/synapses/test_synapses.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pytest
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 
 import paibox as pb
 from paibox.components import FullConnectedSyn
@@ -112,13 +112,13 @@ class TestFullConn:
     @pytest.mark.parametrize(
         "n1, n2, scalar_weight, expected_wp",
         [
-            (pb.IF(10, 3), pb.IF(10, 3), 1, WP.WEIGHT_WIDTH_1BIT),
-            (pb.IF((3, 3), 3), pb.IF((3, 3), 3), 4, WP.WEIGHT_WIDTH_4BIT),
-            (pb.IF((5,), 3), pb.IF((5,), 3), -1, WP.WEIGHT_WIDTH_2BIT),
+            (pb.IF(10, 3), pb.IF(10, 3), 1, WW.WEIGHT_WIDTH_1BIT),
+            (pb.IF((3, 3), 3), pb.IF((3, 3), 3), 4, WW.WEIGHT_WIDTH_4BIT),
+            (pb.IF((5,), 3), pb.IF((5,), 3), -1, WW.WEIGHT_WIDTH_2BIT),
             # TODO 3-dimension shape is correct for data flow?
-            (pb.IF((10, 2, 3), 3), pb.IF((10, 2, 3), 3), 16, WP.WEIGHT_WIDTH_8BIT),
-            (pb.IF((10, 2), 3), pb.IF((4, 5), 3), -100, WP.WEIGHT_WIDTH_8BIT),
-            (pb.IF(10, 3), pb.IF((2, 5), 3), 7, WP.WEIGHT_WIDTH_4BIT),
+            (pb.IF((10, 2, 3), 3), pb.IF((10, 2, 3), 3), 16, WW.WEIGHT_WIDTH_8BIT),
+            (pb.IF((10, 2), 3), pb.IF((4, 5), 3), -100, WW.WEIGHT_WIDTH_8BIT),
+            (pb.IF(10, 3), pb.IF((2, 5), 3), 7, WW.WEIGHT_WIDTH_4BIT),
         ],
     )
     def test_FullConn_One2One_scalar(self, n1, n2, scalar_weight, expected_wp):
@@ -131,7 +131,7 @@ def test_FullConn_One2One_scalar(self, n1, n2, scalar_weight, expected_wp):
             scalar_weight * np.identity(n1.num_out, dtype=WEIGHT_DTYPE),
         )
         assert s1.connectivity.dtype == WEIGHT_DTYPE
-        assert s1.weight_precision is expected_wp
+        assert s1.weight_width is expected_wp
 
     @pytest.mark.parametrize(
         "n1, n2",
@@ -158,7 +158,7 @@ def test_FullConn_One2One_matrix(self):
             s1.connectivity, np.array([[2, 0, 0], [0, 3, 0], [0, 0, 4]], dtype=np.int8)
         )
         assert s1.connectivity.dtype == WEIGHT_DTYPE
-        assert s1.weight_precision is WP.WEIGHT_WIDTH_4BIT
+        assert s1.weight_width is WW.WEIGHT_WIDTH_4BIT
 
         weight = np.array([1, 0, 1, 0], np.int8)
         s2 = pb.FullConn(
@@ -174,7 +174,7 @@ def test_FullConn_One2One_matrix(self):
             ),
         )
         assert s2.connectivity.dtype == WEIGHT_DTYPE
-        assert s2.weight_precision is WP.WEIGHT_WIDTH_1BIT
+        assert s2.weight_width is WW.WEIGHT_WIDTH_1BIT
 
     @pytest.mark.parametrize(
         "n1, n2",
@@ -204,7 +204,7 @@ def test_FullConn_All2All_with_weights(self):
 
         assert np.array_equal(s1.weights, weight)
         assert s1.connectivity.dtype == WEIGHT_DTYPE
-        assert s1.weight_precision is WP.WEIGHT_WIDTH_4BIT
+        assert s1.weight_width is WW.WEIGHT_WIDTH_4BIT
 
         """2. Weights matrix."""
         weight = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

From 67054d821819d9059748eb3fd518c223f7897dc9 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 19 Jul 2024 10:30:52 +0800
Subject: [PATCH 021/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20ANN=20p?=
 =?UTF-8?q?lacement?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_placement.py | 249 ++++++++++++++++++++------------
 1 file changed, 155 insertions(+), 94 deletions(-)

diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index 6febdb32..7be18952 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -1,11 +1,13 @@
 import numpy as np
 import pytest
-from paicorelib import LCN_EX
-from paicorelib import WeightPrecision as WP
+from paicorelib import HwConfig, LCN_EX
+from paicorelib import WeightWidth as WW
 
 import paibox as pb
-from paibox.backend.types import NeuSegment
+from paibox.backend.placement import CorePlacement
+from paibox.backend.types import NeuSegment, WRAMUnpackedType, WRAM_PACKED_DTYPE
 from paibox.exceptions import ResourceError
+from paibox.types import WEIGHT_DTYPE, WeightType
 
 
 def packbits_ref(bits: np.ndarray, count: int) -> int:
@@ -21,10 +23,9 @@ def packbits_ref(bits: np.ndarray, count: int) -> int:
     return result
 
 
-def test_get_raw_weight_ref():
-    rng = np.random.RandomState(seed=1)
-    w1 = rng.randint(-128, 128, size=(10, 20), dtype=np.int8)
-    w2 = rng.randint(-128, 128, size=(10, 30), dtype=np.int8)
+def test_get_raw_weight_ref(random_fixture):
+    w1 = np.random.randint(-128, 128, size=(10, 20), dtype=WEIGHT_DTYPE)
+    w2 = np.random.randint(-128, 128, size=(10, 30), dtype=WEIGHT_DTYPE)
 
     w_of_neurons = [w1, w2]
 
@@ -97,9 +98,8 @@ def test_weight_ram_mapping(input, n_col_groups, expected):
     This is a test of the prototype of the original function.
     """
     cur_shape = input.shape
-    expected_shape = expected.shape
-    row, col = expected.shape
-    o_matrix = np.zeros(expected_shape, dtype=np.int8)
+    row, _ = expected.shape
+    o_matrix = np.zeros(expected.shape, dtype=np.int8)
 
     for i in range(cur_shape[1]):
         w_col = input[:, i]
@@ -111,8 +111,6 @@ def test_weight_ram_mapping(input, n_col_groups, expected):
             ]
             col_group += 1
 
-            print(o_matrix)
-
         o_matrix[:, n_col_groups * i + col_group] = np.pad(
             w_col[row * col_group :],
             pad_width=(0, row - n_rest_axon),
@@ -120,20 +118,18 @@ def test_weight_ram_mapping(input, n_col_groups, expected):
             constant_values=0,
         )
 
-        print(o_matrix)
-
     assert np.array_equal(o_matrix, expected)
 
 
 def test_nfold_weight_ref():
-    original_matrix = np.arange(1, 25, dtype=np.int8).reshape(8, 3)
+    original_matrix = np.arange(1, 25, dtype=WEIGHT_DTYPE).reshape(8, 3)
     nfold = 3
 
     if original_matrix.shape[0] % nfold > 0:
         _padding = nfold - original_matrix.shape[0] % nfold
         w_padding = np.append(
             original_matrix,
-            values=np.zeros((_padding, original_matrix.shape[1]), dtype=np.int8),
+            values=np.zeros((_padding, original_matrix.shape[1]), dtype=WEIGHT_DTYPE),
             axis=0,
         )
     else:
@@ -142,7 +138,8 @@ def test_nfold_weight_ref():
     split = np.vsplit(w_padding, nfold)
 
     result = np.zeros(
-        (w_padding.shape[0] // nfold, original_matrix.shape[1] * nfold), dtype=np.int8
+        (w_padding.shape[0] // nfold, original_matrix.shape[1] * nfold),
+        dtype=WEIGHT_DTYPE,
     )
 
     for i, j in np.ndindex((nfold, original_matrix.shape[1])):
@@ -157,7 +154,7 @@ def test_nfold_weight_ref():
                 [4, 13, 22, 5, 14, 23, 6, 15, 24],
                 [7, 16, 0, 8, 17, 0, 9, 18, 0],
             ],
-            dtype=np.int8,
+            dtype=WEIGHT_DTYPE,
         ),
     )
 
@@ -166,17 +163,15 @@ class TestWeightUnpack:
     @pytest.mark.parametrize(
         "wp",
         [
-            WP.WEIGHT_WIDTH_8BIT,
-            WP.WEIGHT_WIDTH_4BIT,
-            WP.WEIGHT_WIDTH_2BIT,
-            WP.WEIGHT_WIDTH_1BIT,
+            WW.WEIGHT_WIDTH_8BIT,
+            WW.WEIGHT_WIDTH_4BIT,
+            WW.WEIGHT_WIDTH_2BIT,
+            WW.WEIGHT_WIDTH_1BIT,
         ],
     )
     def test_signed_unpackbits(self, wp):
         count = 1 << wp
-        actual_array = np.arange(
-            -(1 << (count - 1)), (1 << (count - 1)), 1, dtype=np.int8
-        )
+        actual_array = np.arange(-(1 << (count - 1)), (1 << (count - 1)), dtype=np.int8)
 
         for actual_signed in actual_array:
             unpacked = np.unpackbits(
@@ -203,20 +198,26 @@ def test_uint8_unpackbits_scalar(self):
         assert np.array_equal(y2, np.array([1, 0, 1, 0, 0, 1, 1, 1], dtype=np.uint8))
 
     @pytest.mark.parametrize(
-        "shape, wp, nfold",
+        "shape, wp, nfold, is_iw8",
         [
-            ((8, 8), WP.WEIGHT_WIDTH_8BIT, 2),
-            ((32, 32), WP.WEIGHT_WIDTH_8BIT, 2),
-            ((16, 16), WP.WEIGHT_WIDTH_4BIT, 4),
-            ((30, 24), WP.WEIGHT_WIDTH_4BIT, 4),
-            ((32, 24), WP.WEIGHT_WIDTH_2BIT, 3),
-            ((32, 24), WP.WEIGHT_WIDTH_1BIT, 3),
-            ((31, 23), WP.WEIGHT_WIDTH_8BIT, 5),
-            ((1200, 200), WP.WEIGHT_WIDTH_1BIT, 2),
-            ((800, 64), WP.WEIGHT_WIDTH_8BIT, 2),
+            ((8, 8), WW.WEIGHT_WIDTH_8BIT, 2, False),
+            ((32, 32), WW.WEIGHT_WIDTH_8BIT, 2, False),
+            ((16, 16), WW.WEIGHT_WIDTH_4BIT, 4, False),
+            ((30, 24), WW.WEIGHT_WIDTH_4BIT, 4, False),
+            ((32, 24), WW.WEIGHT_WIDTH_2BIT, 3, False),
+            ((32, 24), WW.WEIGHT_WIDTH_1BIT, 3, False),
+            ((31, 23), WW.WEIGHT_WIDTH_8BIT, 5, False),
+            ((1200, 200), WW.WEIGHT_WIDTH_1BIT, 2, False),
+            ((800, 64), WW.WEIGHT_WIDTH_8BIT, 2, False),
+            ((8, 8), WW.WEIGHT_WIDTH_8BIT, 2, True),
+            ((32, 32), WW.WEIGHT_WIDTH_8BIT, 2, True),
+            ((16, 16), WW.WEIGHT_WIDTH_4BIT, 4, True),
+            ((200, 32), WW.WEIGHT_WIDTH_8BIT, 2, True),
+            ((30, 24), WW.WEIGHT_WIDTH_4BIT, 4, True),
+            ((32, 24), WW.WEIGHT_WIDTH_2BIT, 3, True),
         ],
     )
-    def test_weight_ram_mapping(self, shape, wp, nfold):
+    def test_weight_ram_mapping(self, shape, wp, nfold, is_iw8):
         nbit = 1 << wp
 
         if shape[0] % nfold > 0:
@@ -229,37 +230,68 @@ def test_weight_ram_mapping(self, shape, wp, nfold):
         # Generate the original weight with shape
         _low = 0 if nbit == 1 else -(1 << (nbit - 1))
         _high = 1 << (nbit - 1)
-        array = np.random.randint(_low, _high, size=shape, dtype=np.int8)
+        test_weight = np.random.randint(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
 
         # 1. Fold, return the folded weight after padding.
-        w_folded = self._fold_raw_weight_ref(array, expected_shape[0], nfold)
+        w_folded = self._nfold_weight_ref(test_weight, expected_shape[0], nfold)
 
         # 2. Unpack, get the weight ram.
-        if nbit > 1:
-            w_unpacked = self._weight_ram_mapping_ref(w_folded, nbit)
-        else:
-            w_unpacked = w_folded.astype(np.bool_)
-
+        # The real interval is HwConfig.N_FANIN_PER_DENDRITE_ANN
+        _fake_interval = w_folded.shape[0] * 2
+        w_unpacked = self._weight_ram_mapping_ref(
+            w_folded, nbit, is_iw8, _fake_interval
+        )
         w_unpacked.setflags(write=False)
 
         # 3. Check
-        for i, j in np.ndindex(shape):
-            n_in_col = w_folded.shape[0]
-            now_i = i % n_in_col
+        self._check(
+            test_weight, w_folded, w_unpacked, nbit, nfold, is_iw8, _fake_interval
+        )
 
-            offset_j = i // n_in_col
-            now_j = offset_j + j * nfold
+    @staticmethod
+    def _nfold_weight_ref(raw_weight: WeightType, expected_row: int, nfold: int):
+        raw_row, raw_col = raw_weight.shape
 
-            expected = array[i, j]
-            wij = w_unpacked[now_i, now_j * nbit : (now_j + 1) * nbit]
-            packed = packbits_ref(wij, nbit)
+        if raw_row % nfold > 0:
+            _padding = nfold - raw_row % nfold
+            assert expected_row * nfold == raw_row + _padding
 
-            assert expected == packed
+            w_padding = np.append(
+                raw_weight,
+                values=np.zeros((_padding, raw_col), dtype=WEIGHT_DTYPE),
+                axis=0,
+            )
+        else:
+            w_padding = raw_weight
+
+        split = np.vsplit(w_padding, nfold)
+        w_folded = np.zeros((expected_row, raw_col * nfold), dtype=WEIGHT_DTYPE)
+
+        for i, j in np.ndindex((nfold, raw_col)):
+            w_col = split[i][:, j]
+            w_folded[:, j * nfold + i] = w_col
+
+        return w_folded
 
     @staticmethod
-    def _weight_ram_mapping_ref(folded_weights: np.ndarray, n_bit: int):
+    def _weight_ram_mapping_ref(
+        folded_weights: WeightType,
+        n_bit: int,
+        is_iw8: bool,
+        fake_interval: int,
+    ):
         row, col = folded_weights.shape
-        result = np.zeros((row, col * n_bit), dtype=np.uint8)
+        # if iw = 1, the row of result is the same as the row of folded_weights
+        if not is_iw8:
+            result_row = row
+        else:
+            result_row = 8 * fake_interval
+
+        result = np.zeros((result_row, col * n_bit), dtype=np.uint8)
+
+        if n_bit == 1:
+            result[:row, :col] = folded_weights
+            return result
 
         # [N*M] -> [M*N*1]
         folded_weights_3d = np.expand_dims(folded_weights.T, axis=2).astype(np.uint8)
@@ -270,25 +302,68 @@ def _weight_ram_mapping_ref(folded_weights: np.ndarray, n_bit: int):
                 folded_weights_3d[i], axis=1, count=n_bit, bitorder="little"
             )
 
-            result[:, n_bit * i : n_bit * (i + 1)] = unpacked
+            if not is_iw8:
+                result[:row, n_bit * i : n_bit * (i + 1)] = unpacked
+            else:
+                for bit in range(n_bit):
+                    result[bit * fake_interval : bit * fake_interval + row, i] = (
+                        unpacked[:, bit]
+                    )
 
         assert np.max(result, axis=None) <= 1
         assert np.min(result, axis=None) >= 0
 
         return result
 
-    def test_packbits_to_mapping_form(self):
+    @staticmethod
+    def _check(
+        test_data: WeightType,
+        w_folded: WeightType,
+        w_unpacked: WRAMUnpackedType,
+        nbit: int,
+        nfold: int,
+        is_iw8: bool,
+        fake_interval: int = 0,
+    ) -> None:
+        for i, j in np.ndindex(test_data.shape):
+            n_in_col = w_folded.shape[0]
+            now_i = i % n_in_col
+            offset_j = i // n_in_col
+            now_j = offset_j + j * nfold
+
+            if not is_iw8:
+                wij = w_unpacked[now_i, now_j * nbit : (now_j + 1) * nbit]
+            else:
+                # From LSB to MSB
+                bits = [
+                    w_unpacked[i * fake_interval + now_i, now_j] for i in range(nbit)
+                ]
+                wij = np.asarray(bits, dtype=np.uint8)
+
+            wij_packed = packbits_ref(wij, nbit)
+            assert test_data[i, j] == wij_packed
+
+    def test_CorePlacement_weight_pack_shape(self):
+        # Mock unpacked weight
+        w_unpacked = np.zeros(CorePlacement.WRAM_BASE_SHAPE, dtype=np.uint8)
+        w_packed_u64 = CorePlacement._weight_pack(w_unpacked)
+
+        assert w_packed_u64.shape == (
+            (HwConfig.ADDR_RAM_MAX + 1),
+            (HwConfig.ADDR_AXON_MAX + 1) // (WRAM_PACKED_DTYPE(1).nbytes * 8),
+        )
+
+    def test_packbits_to_mapping_form(self, random_fixture):
         def _weight_ram_T(weight_ram_mapped: np.ndarray):
             _w = weight_ram_mapped.T.reshape(-1, 64)
             w_packed_u8 = np.packbits(_w, axis=-1, bitorder="little")
 
             return w_packed_u8
 
-        rng = np.random.RandomState(42)
-        w = rng.randint(-8, 8, size=(1152, 64), dtype=np.int8)
+        w = np.random.randint(-8, 8, size=(1152, 64), dtype=WEIGHT_DTYPE)
 
         # 1152 * 512
-        w1 = self._weight_ram_mapping_ref(w, 8)
+        w1 = self._weight_ram_mapping_ref(w, 8, False, 0)
 
         # -> 512 * 1152 -> 512 * 144 (uint8)
         wT = _weight_ram_T(w1)
@@ -297,38 +372,11 @@ def _weight_ram_T(weight_ram_mapped: np.ndarray):
         ww.setflags(write=False)
         assert 1
 
-    @staticmethod
-    def _fold_raw_weight_ref(raw_weight: np.ndarray, expected_row: int, nfold: int):
-        raw_row, raw_col = raw_weight.shape
-
-        if raw_row % nfold > 0:
-            _padding = nfold - raw_row % nfold
-            assert expected_row * nfold == raw_row + _padding
-
-            w_padding = np.append(
-                raw_weight,
-                values=np.zeros((_padding, raw_col), dtype=np.int8),
-                axis=0,
-            )
-        else:
-            w_padding = raw_weight.copy()
-
-        split = np.vsplit(w_padding, nfold)
-        assert w_padding.shape[0] == expected_row * nfold
-
-        w_folded = np.zeros((expected_row, raw_col * nfold), dtype=np.int8)
-
-        for i, j in np.ndindex((nfold, raw_col)):
-            w_col = split[i][:, j]
-            w_folded[:, j * nfold + i] = w_col
-
-        return w_folded
-
     def test_weight_ram_mapping_8bits(self, packbits8):
         binary_conn = np.zeros((6, 8 * 5), dtype=np.bool_)
-        wp = WP.WEIGHT_WIDTH_8BIT
+        wp = WW.WEIGHT_WIDTH_8BIT
 
-        array = np.random.randint(-128, 128, size=(4, 4), dtype=np.int8)
+        array = np.random.randint(-128, 128, size=(4, 4), dtype=WEIGHT_DTYPE)
 
         y = np.unpackbits(np.uint8(array), axis=1, bitorder="little")
         assert y.shape == (4, (1 << wp) * 4)
@@ -344,9 +392,9 @@ def test_weight_ram_mapping_8bits(self, packbits8):
 
     def test_weight_ram_mapping_4bits(self, packbits4):
         binary_conn = np.zeros((6, 4 * 5), dtype=np.bool_)
-        wp = WP.WEIGHT_WIDTH_4BIT
+        wp = WW.WEIGHT_WIDTH_4BIT
 
-        array = np.random.randint(-8, 8, size=(4, 4), dtype=np.int8)
+        array = np.random.randint(-8, 8, size=(4, 4), dtype=WEIGHT_DTYPE)
         y = np.zeros((4, 16), dtype=np.uint8)
 
         for i in range(4):
@@ -367,9 +415,9 @@ def test_weight_ram_mapping_4bits(self, packbits4):
 
     def test_weight_ram_mapping_2bits(self, packbits2):
         binary_conn = np.zeros((6, 4 * 5), dtype=np.bool_)
-        wp = WP.WEIGHT_WIDTH_2BIT
+        wp = WW.WEIGHT_WIDTH_2BIT
 
-        array = np.random.randint(-2, 2, size=(4, 4), dtype=np.int8)
+        array = np.random.randint(-2, 2, size=(4, 4), dtype=WEIGHT_DTYPE)
         y = np.zeros((4, 8), dtype=np.uint8)
 
         for i in range(4):
@@ -392,8 +440,21 @@ def test_weight_ram_mapping_2bits(self, packbits2):
 def test_n_axon2lcn_ex():
     from .conftest import n_axon2lcn_ex_proto
 
-    lcn_ex = n_axon2lcn_ex_proto(1152 * 18 + 1, 1152)
+    lcn_ex = n_axon2lcn_ex_proto(
+        HwConfig.N_FANIN_PER_DENDRITE_SNN * 18 + 1, HwConfig.N_FANIN_PER_DENDRITE_SNN
+    )
     assert lcn_ex == LCN_EX.LCN_32X
 
+    lcn_ex = n_axon2lcn_ex_proto(
+        HwConfig.N_FANIN_PER_DENDRITE_ANN * 3 + 20, HwConfig.N_FANIN_PER_DENDRITE_ANN
+    )
+    assert lcn_ex == LCN_EX.LCN_4X
+
+    with pytest.raises(ValueError):
+        lcn_ex = n_axon2lcn_ex_proto(0, HwConfig.N_FANIN_PER_DENDRITE_SNN)
+
     with pytest.raises(ResourceError):
-        lcn_ex = n_axon2lcn_ex_proto(1152 * 64 + 1, 1152)
+        lcn_ex = n_axon2lcn_ex_proto(
+            HwConfig.N_FANIN_PER_DENDRITE_SNN << LCN_EX.LCN_64X + 1,
+            HwConfig.N_FANIN_PER_DENDRITE_SNN,
+        )

From 9fbb3ba1116586c2efecb653c266743d1a83c5d6 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 19 Jul 2024 10:32:24 +0800
Subject: [PATCH 022/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20ANN=20m?=
 =?UTF-8?q?apping=20&=20remove=20unused=20weight4=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_mapper.py | 108 +++++------------------------------
 tests/shared_networks.py     |  58 ++++++++++++++++++-
 2 files changed, 71 insertions(+), 95 deletions(-)

diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 73e5ea84..516e89b6 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -122,15 +122,26 @@ def test_nested_net_L2_compile(self, get_mapper, build_Nested_Net_level_2):
         assert len(mapper.graph_info["output"]) == 1
 
     def test_nested_net_L3_compile(self, get_mapper, build_Nested_Net_level_3):
-        net2 = build_Nested_Net_level_3
+        net = build_Nested_Net_level_3
         mapper: pb.Mapper = get_mapper
-        mapper.build(net2)
+        mapper.build(net)
         mapper.compile()
 
         assert len(mapper.graph.edges.keys()) == 5
         assert len(mapper.graph_info["input"]) == 2
         assert len(mapper.graph_info["output"]) == 1
 
+    def test_ANN_network_compile(
+        self, get_mapper, build_ANN_Network_1, ensure_dump_dir
+    ):
+        net = build_ANN_Network_1
+        mapper: pb.Mapper = get_mapper
+        mapper.build(net)
+        mapper.compile()
+        mapper.export(fp=ensure_dump_dir, export_core_params=True)
+
+        assert 1
+
 
 class TestMapperDeployment:
     def test_build_graph(self, get_mapper, build_example_net1, build_example_net2):
@@ -331,104 +342,13 @@ def test_export_empty_cplm(self, build_example_net4_large_scale, ensure_dump_dir
         assert len(mapper.routing_groups[1].wasted_coords) == 2
 
 
-class TestMapper_Weight4:
-    @pytest.mark.skipif(
-        hasattr(SynSys, "CFLAG_ENABLE_WP_OPTIMIZATION"), reason="Breaking change"
-    )
-    def test_mapper_weight4(
-        self, monkeypatch, ensure_dump_dir, build_network_with_branches_4bit, packbits8
-    ):
-        # Use monkey patch to change the settings of `HwConfig` when running the test.
-        monkeypatch.setattr(HwConfig, "N_DENDRITE_MAX_SNN", 8 * 8)
-        monkeypatch.setattr(HwConfig, "N_FANIN_PER_DENDRITE_SNN", 6)
-
-        net = build_network_with_branches_4bit
-
-        mapper = pb.Mapper()
-        mapper.build(net)
-        mapper.compile()
-
-        configs = mapper.export(write_to_file=False, fp=ensure_dump_dir, format="npy")
-
-        assert mapper.n_core_required == 11
-
-        from paibox.backend.checker import ConfigChecker
-
-        cplm00 = mapper.core_blocks[0].core_placements[Coord(0, 0)]
-        cplm01 = mapper.core_blocks[0].core_placements[Coord(0, 1)]
-        cplm10 = mapper.core_blocks[0].core_placements[Coord(1, 0)]
-
-        n_config_core00 = ConfigChecker.n_config_estimate(
-            cplm00.n_neuron, cplm00.weight_precision, cplm00.lcn_ex
-        )
-        n_config_core01 = ConfigChecker.n_config_estimate(
-            cplm01.n_neuron, cplm01.weight_precision, cplm01.lcn_ex
-        )
-        n_config_core10 = ConfigChecker.n_config_estimate(
-            cplm10.n_neuron, cplm10.weight_precision, cplm10.lcn_ex
-        )
-
-        assert n_config_core00 == configs[Coord(0, 0)].size
-        assert n_config_core01 == configs[Coord(0, 1)].size
-        assert n_config_core10 == configs[Coord(1, 0)].size
-
-        # The #N of config frames of each core.
-
-        original_w1 = net.s1.connectivity
-        original_w2 = net.s2.connectivity
-        original_w3 = net.s3.connectivity
-        original_w4 = net.s4.connectivity
-        original_w5 = net.s5.connectivity
-
-        # Folded weight of s1
-        w11_folded = mapper.core_blocks[0].core_placements[Coord(0, 0)]._weights_folded
-        w12_folded = mapper.core_blocks[0].core_placements[Coord(0, 1)]._weights_folded
-        w13_folded = mapper.core_blocks[0].core_placements[Coord(1, 0)]._weights_folded
-
-        # Splited & folded weight of s2 & s3
-        w21_folded = mapper.core_blocks[1].core_placements[Coord(2, 0)]._weights_folded
-        w22_folded = mapper.core_blocks[1].core_placements[Coord(2, 1)]._weights_folded
-        w23_folded = mapper.core_blocks[1].core_placements[Coord(3, 0)]._weights_folded
-        w24_folded = mapper.core_blocks[1].core_placements[Coord(3, 1)]._weights_folded
-        w25_folded = mapper.core_blocks[1].core_placements[Coord(2, 2)]._weights_folded
-        w26_folded = mapper.core_blocks[1].core_placements[Coord(2, 3)]._weights_folded
-
-        # Splited & folded weight of s4 & 5
-        w31_folded = mapper.core_blocks[2].core_placements[Coord(0, 2)]._weights_folded
-        w32_folded = mapper.core_blocks[2].core_placements[Coord(0, 3)]._weights_folded
-
-        # Unpacked weight of s1
-        w11_unpacked = mapper.core_blocks[0].core_placements[Coord(0, 0)].weight_ram
-        w12_unpacked = mapper.core_blocks[0].core_placements[Coord(0, 1)].weight_ram
-        w13_unpacked = mapper.core_blocks[0].core_placements[Coord(1, 0)].weight_ram
-
-        for i in range(10):
-            for j in range(4):
-                n_in_col = w11_folded.shape[0]
-                now_i = i % n_in_col
-
-                offset_j = i // n_in_col
-                now_j = offset_j + j * 2
-
-                expected = original_w1[i, j]
-                wij = w11_folded[now_i, now_j]
-
-                assert expected == wij
-
-                # wij = w11_folded[now_i, now_j * 8 : (now_j + 1) * 8]
-                # packed = packbits8(wij)
-                # assert expected == packed
-
-        print("OK")
-
-
 class TestMapper_Compile:
     def test_grouping_optim_latency(
         self, monkeypatch, build_Network_8bit_dense, ensure_dump_dir
     ):
         from paibox.backend.conf_template import export_core_plm_conf_json
 
-        monkeypatch.setattr(HwConfig, "N_DENDRITE_MAX_SNN", 8 * 8)
+        monkeypatch.setattr(HwConfig, "N_NEURON_MAX_SNN", 8 * 8)
         monkeypatch.setattr(HwConfig, "N_FANIN_PER_DENDRITE_SNN", 6)
 
         net = build_Network_8bit_dense
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 0c1d5d30..876a95a3 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -1,5 +1,5 @@
 from typing import Literal
-
+import numpy as np
 import pytest
 
 import paibox as pb
@@ -216,6 +216,57 @@ def __init__(self, shape, axes):
         self.probe2 = pb.Probe(self.n2, "spike")
 
 
+class ANNNetwork(pb.Network):
+    def __init__(self):
+        super().__init__()
+        self.inp1 = pb.InputProj(input=_out_bypass1, shape_out=(32, 32))
+
+        n1_bias = np.random.randint(-128, 128, size=(4,), dtype=np.int8)
+        self.n1 = pb.LIF(
+            (4, 30, 30),
+            100,
+            bias=n1_bias,
+            tick_wait_start=1,
+            input_width=8,
+            spike_width=8,
+            snn_en=False,
+        )
+        n2_bias = np.random.randint(-128, 128, size=(4,), dtype=np.int8)
+        self.n2 = pb.LIF(
+            (4, 28, 28),
+            50,
+            bias=n2_bias,
+            tick_wait_start=2,
+            input_width=8,
+            spike_width=8,
+            snn_en=False,
+        )
+        self.n3 = pb.LIF(
+            (2, 26, 26),
+            20,
+            bias=1,
+            tick_wait_start=3,
+            input_width=8,
+            spike_width=8,
+            snn_en=False,
+        )
+        self.n4 = pb.IF(
+            (100,), 10, tick_wait_start=4, input_width=8, spike_width=8, snn_en=False
+        )
+
+        kernel_1 = np.random.randint(-128, 128, size=(4, 1, 3, 3), dtype=np.int8)
+        self.conv2d_1 = pb.Conv2d(self.inp1, self.n1, kernel_1)
+
+        kernel_2 = np.random.randint(-128, 128, size=(4, 4, 3, 3), dtype=np.int8)
+        self.conv2d_2 = pb.Conv2d(self.n1, self.n2, kernel_2)
+
+        kernel_3 = np.random.randint(-128, 128, size=(2, 4, 3, 3), dtype=np.int8)
+        self.conv2d_3 = pb.Conv2d(self.n2, self.n3, kernel_3)
+
+        w4 = np.random.randint(-128, 128, size=(2 * 26 * 26, 100), dtype=np.int8)
+        self.fc1 = pb.FullConn(self.n3, self.n4, w4)
+
+
 @pytest.fixture(scope="class")
 def build_BitwiseAND_Net():
     return FunctionalModule_2to1_Net("and")
@@ -264,3 +315,8 @@ def build_FModule_ConnWithModule_Net():
 @pytest.fixture(scope="class")
 def build_FModule_ConnWithFModule_Net():
     return FModule_ConnWithFModule_Net()
+
+
+@pytest.fixture(scope="class")
+def build_ANN_Network_1():
+    return ANNNetwork()

From d2e50835f4f57ecbfc189abd119a25d855c6337d Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 19 Jul 2024 10:32:55 +0800
Subject: [PATCH 023/187] =?UTF-8?q?=E2=9C=85=20update=20tests=20with=20rea?=
 =?UTF-8?q?son=20for=20deprecation=20skip=20&=20fix=20random=20state?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 8 ++++----
 tests/simulator/test_encoder.py     | 5 ++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 9913fb11..b50ee82a 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -551,7 +551,7 @@ def test_SpikingPool2dWithV_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
-    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"), reason="deprecated")
     @pytest.mark.parametrize("shape", [(32, 16), (1, 32), (64,), (128, 1), 48])
     def test_Transpose2d(self, shape):
         from tests.shared_networks import TransposeModule_T2d_Net
@@ -582,7 +582,7 @@ def test_Transpose2d(self, shape):
             expected = inpa[i - 2].T.ravel()
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
 
-    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"), reason="deprecated")
     def test_Transpose2d_mapping(self, ensure_dump_dir):
         from tests.shared_networks import TransposeModule_T2d_Net
 
@@ -593,7 +593,7 @@ def test_Transpose2d_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
-    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"), reason="deprecated")
     @pytest.mark.parametrize(
         "shape, axes",
         [
@@ -635,7 +635,7 @@ def test_Transpose3d(self, shape, axes):
             expected = inpa[i - 2].transpose(axes).ravel()
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
 
-    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"))
+    @pytest.mark.skipif(hasattr(pb.Transpose2d, "__deprecated__"), reason="deprecated")
     def test_Transpose3d_mapping(self, ensure_dump_dir):
         from tests.shared_networks import TransposeModule_T3d_Net
 
diff --git a/tests/simulator/test_encoder.py b/tests/simulator/test_encoder.py
index 4821d262..259e8ccb 100644
--- a/tests/simulator/test_encoder.py
+++ b/tests/simulator/test_encoder.py
@@ -36,10 +36,9 @@ def test_LatencyEncoder(self):
             out_spike2[t] = le2(x)
         assert 1
 
-    def test_PoissonEncoder(self):
+    def test_PoissonEncoder(self, random_fixture):
         seed = 1
-        rng = np.random.RandomState(seed=seed)
-        x = rng.rand(10, 10).astype(np.float32)
+        x = np.random.rand(10, 10).astype(np.float32)
         pe = pb.simulator.PoissonEncoder(seed=seed)
         out_spike = np.full((20, 10, 10), 0)
         for t in range(20):

From b0bf42604a6ed0361daf09fc207d4e51f802910b Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 19 Jul 2024 10:37:26 +0800
Subject: [PATCH 024/187] =?UTF-8?q?=F0=9F=8E=A8=20update=20references=20ba?=
 =?UTF-8?q?sed=20on=20paicorelib=20~1.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_template.py | 20 +++++---------------
 paibox/backend/placement.py     | 21 ++++++---------------
 paibox/backend/types.py         |  4 ++--
 3 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index f58a5647..c7482275 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -22,6 +22,7 @@
 )
 from paicorelib import ReplicationId as RId
 from paicorelib import (
+    NeuronConf,
     RoutingCoord,
     SNNModeEnable,
     SpikeWidthFormat,
@@ -29,7 +30,7 @@
     get_replication_id,
 )
 from paicorelib.framelib import types as flib_types
-from paicorelib.framelib.frame_gen import OfflineFrameGen
+from paicorelib.framelib import OfflineFrameGen
 from paicorelib.framelib.utils import _mask, np2bin, np2npy, np2txt
 
 if sys.version_info >= (3, 10):
@@ -175,16 +176,6 @@ class OutputNeuronDest(NamedTuple):
     end: AxonCoord
 
 
-try:
-    from paicorelib.ram_model import NeuronConf as _NeuronConf
-except ImportError:
-    from pydantic import BaseModel
-
-    class _NeuronConf(BaseModel):
-        attrs: NeuronAttrs
-        dest_info: NeuronDestInfo
-
-
 class NeuronConfig(NamedTuple):
     _extra_params = (
         "n_neuron",
@@ -239,8 +230,8 @@ def encapsulate(
             neu_seg.n_neuron, neu_seg.addr_ram, neu_seg.offset, attrs, neuron_dest_info
         )
 
-    def export(self) -> _NeuronConf:
-        return _NeuronConf(attrs=self.neuron_attrs, dest_info=self.neuron_dest_info)
+    def export(self) -> NeuronConf:
+        return NeuronConf(attrs=self.neuron_attrs, dest_info=self.neuron_dest_info)
 
     def to_json(self) -> Union[str, bytes]:
         """Dump the configs into json for debugging."""
@@ -393,8 +384,7 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                         _n_neuron_nram,
                         neu_conf.neuron_attrs,
                         neu_conf.neuron_dest_info,
-                        lcn_ex=v.params_reg.lcn_extension,
-                        weight_precision=v.params_reg.weight_precision,
+                        v.params_reg.n_repeat_nram,
                     )
                 )
 
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 793090a2..d0d59af8 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -393,8 +393,10 @@ class CorePlacement(CoreAbstract):
     neu_segs_of_cplm: NeuSegOfCorePlm
     neu_configs: dict[Neuron, NeuronConfig]
 
-    # FIXME Change to HwConfig.ADDR_AXON_MAX(1152) once it is fixed.
-    WRAM_BASE_SHAPE: ClassVar[tuple[int, int]] = (1152, HwConfig.ADDR_RAM_MAX)
+    WRAM_BASE_SHAPE: ClassVar[tuple[int, int]] = (
+        HwConfig.ADDR_AXON_MAX + 1,
+        HwConfig.ADDR_RAM_MAX + 1,
+    )
 
     def __init__(
         self,
@@ -797,19 +799,8 @@ def max_lcn_of_cb(cb: list[CoreBlock]) -> LCN_EX:
     return max(cb, key=lambda cb: cb.lcn_ex).lcn_ex
 
 
+# Get the fan-out by the combination rate of dendrites
 if hasattr(HwConfig, "FANOUT_IW8"):
     FANOUT_IW8 = HwConfig.FANOUT_IW8  # type: ignore
 else:
-    # Get the fan-out by the combination rate of dendrites
-    FANOUT_IW8: list[int] = [
-        HwConfig.N_NEURON_MAX_ANN,
-        1364,
-        876,
-        512,
-        256,
-        128,
-        64,
-        32,
-        16,
-        8,
-    ]
+    FANOUT_IW8 = [HwConfig.N_NEURON_MAX_ANN, 1364, 876, 512, 256, 128, 64, 32, 16, 8]
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 3eaf594e..bed63c20 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -125,9 +125,9 @@ def addr_ram(self) -> list[int]:
     def addr_max(self) -> int:
         if (
             _addr_max := self.offset + self.repeat * self.n_neuron
-        ) > HwConfig.ADDR_RAM_MAX:
+        ) > HwConfig.ADDR_RAM_MAX + 1:
             raise ValueError(
-                f"neuron RAM address out of range {HwConfig.ADDR_RAM_MAX} ({_addr_max})."
+                f"neuron RAM address out of range {HwConfig.ADDR_RAM_MAX + 1} ({_addr_max})."
             )
 
         return _addr_max

From 10a5090bb4c9c4ebeba55e042bf19da73928a924 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 19 Jul 2024 10:45:50 +0800
Subject: [PATCH 025/187] =?UTF-8?q?=F0=9F=93=9D=20add=20the=20description?=
 =?UTF-8?q?=20of=20the=20ANN=20parameters?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/Guide-of-PAIBox.md | 54 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/docs/Guide-of-PAIBox.md b/docs/Guide-of-PAIBox.md
index 70826bab..1301133d 100644
--- a/docs/Guide-of-PAIBox.md
+++ b/docs/Guide-of-PAIBox.md
@@ -1,3 +1,13 @@
+<style>
+.center 
+{
+    width: auto;
+    display: table;
+    margin-left: auto;
+    margin-right: auto;
+}
+</style>
+
 <div align="center">
 # PAIBox使用指南
 
@@ -9,7 +19,7 @@
 python = "^3.9"
 pydantic = "^2.0.3"
 numpy = "^1.26.0"
-paicorelib = "^1.1.6"
+paicorelib = "~1.3"
 ```
 
 可选依赖：
@@ -67,11 +77,49 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Fa
 - `delay`：设定神经元输出的延迟。默认为1，即本时间步的计算结果，**下一时间步**传递至后继节点。
 - `tick_wait_start`：设定神经元启动时间。神经元将在第 `T` 个时间步时启动。0表示不启动。默认为1。
 - `tick_wait_end`：设定神经元持续工作时长。神经元将持续工作 `T` 个时间步。0表示**持续工作**。默认为0。
-- `unrolling_factor`：该参数与后端流程相关。展开因子表示神经元将被展开，部署至更多的物理核上，以降低延迟并提高吞吐率。
+- `unrolling_factor`：展开因子表示神经元将被展开，部署至更多的物理核上，以降低延迟并提高吞吐率。该参数仅与后端流程相关。默认为1。
 - `overflow_strict`：溢出严格模式。用于设置是否严格检查运算过程中神经元膜电位出现溢出的情况。若启用，遇到溢出将报错，否则将遵循硬件行为进行处理。默认为 `False`。
 - `keep_shape`：是否在仿真记录数据时保持尺寸信息，默认为 `True`。实际进行运算的尺寸仍视为一维。
 - `name`：神经元的名称。可选参数。
 
+神经元的部分行为由芯片计算核的某些配置项决定：输入数据位数、输出数据位数、SNN使能。芯片计算核的工作模式即由这些参数决定。例如，SNN模式则是输入数据、输出数据位数均为1bit，SNN使能为1。对应关系如下表所列：
+
+<p align="center">计算核配置项与工作模式对应表</p>
+<div class="center">
+
+|           模式           | `input_width` | `spike_width` | `snn_en` |
+| :-----------------------: | :-------------: | :-------------: | :--------: |
+|           BANN           |        0        |        0        |     0     |
+|            SNN            |        0        |        0        |     1     |
+|       BANN/SNN to ANN       |        0        |        1        |     0     |
+| BANN/SNN to SNN with values |        0        |        1        |     1     |
+|       ANN to BANN/SNN       |        1        |        0        |     0     |
+|           BANN           |        1        |        1        |     0     |
+|         Undefined         |        1        |       0/1       |     1     |
+
+</div>
+
+- `input_width`：处理核输入数据位数，1或8。为1表示该处理核的输入数据为脉冲，反之为 8bit 无符号数。默认为1。
+- `spike_width`：神经元输出数据位数，1或8。为1表示该处理核输出数据（从神经元输出）为脉冲，反之为 8bit 无符号数。默认为1。
+- `snn_en`：SNN 模式使能。当开启时，神经元内的计算保留上一时刻膜电平信息，反之不保留（ANN 计算模式不需要上一时刻膜电平信息）。默认为 `True`。
+- `bit_truncation`：神经元输出的 8bit 无符号数的截断位置。默认为8，该参数仅在 `spike_width=8` 时生效。由于膜电平为 30bit 有符号数，因此需要截取 8bit 作为神经元最终的输出。若膜电平最高有效位大于所截取的位置，则输出255。该截断操作类似于有上限的斜率可调的 Relu 操作。`bit_truncation` 与截取位置的对应关系如下表所列：
+
+<p align="center">截取位置对应表</p>
+<div class="center">
+
+| `bit_truncation` |   截取位置   |
+| :----------------: | :-----------: |
+|         0         |     8'h0     |
+|         1         |  {[0], 7'h0}  |
+|         2         | {[1:0], 6'h0} |
+|        ……        |     ……     |
+|         8         |     [7:0]     |
+|         9         |     [8:1]     |
+|        ……        |     ……     |
+|         29         |    [28:21]    |
+
+</div>
+
 #### LIF
 
 LIF 神经元实现了“泄露-积分-发射”神经元模型，其调用方式及参数如下：
@@ -912,7 +960,7 @@ mapper.clear()
 
 - `input`：输入节点信息字典。
 - `output`：输出目的地信息字典。
-- `memebers`：中间层所在物理核的配置项字典。
+- `members`：中间层所在物理核的配置项字典。
 - `inherent_timestep`：网络的最长时间步。
 - `n_core_required`：网络**需要**的物理核数目。
 - `n_core_occupied`：网络**实际占用**的物理核数目。

From 68fda25ab22901cc87f6fecac8374a7d00ece5da Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 22 Jul 2024 10:52:39 +0800
Subject: [PATCH 026/187] =?UTF-8?q?=F0=9F=94=96=20v1.2.0a1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md   | 12 ++++++++----
 poetry.lock    |  4 ++--
 pyproject.toml |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5c766b1b..bf8d663e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,7 +21,7 @@
 
 ## v1.0.0a6
 
-- 新增 `Always1Neuron` 神经元，该神经元将在工作期间持续输出1，不得单独存在，需存在前向突触与其连接。
+- 新增 `Always1Neuron` 神经元，该神经元将在工作期间持续输出1，不得单独存在，需存在前向突触与其连接
 
 ## v1.0.0a7
 
@@ -58,16 +58,20 @@
   2. 负阈值，默认为硬件允许的最小负整数
   3. LIF 支持设置偏置，偏置可为数组形式
   4. LIF 支持同时设置泄露与偏置，将叠加处理
-
 - 支持神经元的随机突触整合、随机阈值、随机泄露配置的设置，但不支持仿真
 - 支持多芯片部署
-- 重构路由算法，现在的算法不会出现路由死锁
+- 重构路由算法，现在路由不会出现死锁
 - 行为变更：
 
   1. 子网络现在直接在主网络内部 `self.subnet=...` 例化即可
   2. 编译选项现在直接通过 `paibox.Mapper.compile(...)` 传入，默认配置不变
-  3. 在 `paibox.Mapper.export()` 中使用 `split_by_chip` 指定配置帧文件是否以芯片分割，默认不分割。原 `split_by_coord` 弃用。
+  3. 在 `paibox.Mapper.export()` 中使用 `split_by_chip` 指定配置帧文件是否以芯片分割，默认不分割。原 `split_by_coord` 弃用
 
 ## v1.1.1
 
 - 修复对权重RAM错误的配置
+
+## v1.2.0a1
+
+- 提高 `paicorelib` 依赖版本至 `~1.3`
+- 支持 ANN 网络的构建与部署
diff --git a/poetry.lock b/poetry.lock
index 50ea8f2f..4645ad5b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -200,7 +200,7 @@ reference = "tsinghua"
 
 [[package]]
 name = "paicorelib"
-version = "1.3.0a1"
+version = "1.3.0"
 description = "Library of PAICORE 2.0"
 optional = false
 python-versions = "^3.9"
@@ -215,7 +215,7 @@ pydantic = "^2.0.3"
 type = "git"
 url = "https://github.com/PAICookers/PAIlib.git"
 reference = "dev"
-resolved_reference = "041f4451c01c1d6710c51eece9fa3e98edffdac4"
+resolved_reference = "5cedc5fb1f66bc21e1c442a87bc804517a6555c2"
 
 [[package]]
 name = "pluggy"
diff --git a/pyproject.toml b/pyproject.toml
index adad889e..75cc1f5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "paibox"
-version = "1.1.1"
+version = "1.2.0a1"
 description = "Toolchain of PAICORE 2.0"
 authors = ["Ziru Pan <zrpan@stu.pku.edu.cn>"]
 maintainers = [

From 36210ac7d14e489745a7c56d7a9cbf0a6f465a9c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 02:53:08 +0000
Subject: [PATCH 027/187] :rotating_light: auto fix by pre-commit hooks

---
 CHANGELOG.md                             |  1 +
 docs/Guide-of-PAIBox.md                  | 40 ++++++++++++------------
 paibox/backend/conf_template.py          |  8 ++---
 paibox/backend/placement.py              | 12 +++----
 paibox/backend/segment_utils.py          |  4 +--
 paibox/components/modules.py             |  4 +--
 paibox/components/neuron/base.py         |  8 ++---
 paibox/components/neuron/utils.py        |  5 ++-
 paibox/components/synapses/transforms.py |  2 +-
 tests/backend/test_mapper.py             |  3 +-
 tests/backend/test_placement.py          |  4 +--
 tests/components/neuron/test_neurons.py  |  4 +--
 tests/shared_networks.py                 |  1 +
 tests/test_utils.py                      |  2 +-
 14 files changed, 50 insertions(+), 48 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf8d663e..8274561c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -58,6 +58,7 @@
   2. 负阈值，默认为硬件允许的最小负整数
   3. LIF 支持设置偏置，偏置可为数组形式
   4. LIF 支持同时设置泄露与偏置，将叠加处理
+
 - 支持神经元的随机突触整合、随机阈值、随机泄露配置的设置，但不支持仿真
 - 支持多芯片部署
 - 重构路由算法，现在路由不会出现死锁
diff --git a/docs/Guide-of-PAIBox.md b/docs/Guide-of-PAIBox.md
index 1301133d..94bd1039 100644
--- a/docs/Guide-of-PAIBox.md
+++ b/docs/Guide-of-PAIBox.md
@@ -1,5 +1,5 @@
 <style>
-.center 
+.center
 {
     width: auto;
     display: table;
@@ -87,15 +87,15 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Fa
 <p align="center">计算核配置项与工作模式对应表</p>
 <div class="center">
 
-|           模式           | `input_width` | `spike_width` | `snn_en` |
-| :-----------------------: | :-------------: | :-------------: | :--------: |
-|           BANN           |        0        |        0        |     0     |
-|            SNN            |        0        |        0        |     1     |
-|       BANN/SNN to ANN       |        0        |        1        |     0     |
-| BANN/SNN to SNN with values |        0        |        1        |     1     |
-|       ANN to BANN/SNN       |        1        |        0        |     0     |
-|           BANN           |        1        |        1        |     0     |
-|         Undefined         |        1        |       0/1       |     1     |
+|            模式             | `input_width` | `spike_width` | `snn_en` |
+| :-------------------------: | :-----------: | :-----------: | :------: |
+|            BANN             |       0       |       0       |    0     |
+|             SNN             |       0       |       0       |    1     |
+|       BANN/SNN to ANN       |       0       |       1       |    0     |
+| BANN/SNN to SNN with values |       0       |       1       |    1     |
+|       ANN to BANN/SNN       |       1       |       0       |    0     |
+|            BANN             |       1       |       1       |    0     |
+|          Undefined          |       1       |      0/1      |    1     |
 
 </div>
 
@@ -107,16 +107,16 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Fa
 <p align="center">截取位置对应表</p>
 <div class="center">
 
-| `bit_truncation` |   截取位置   |
-| :----------------: | :-----------: |
-|         0         |     8'h0     |
-|         1         |  {[0], 7'h0}  |
-|         2         | {[1:0], 6'h0} |
-|        ……        |     ……     |
-|         8         |     [7:0]     |
-|         9         |     [8:1]     |
-|        ……        |     ……     |
-|         29         |    [28:21]    |
+| `bit_truncation` |   截取位置    |
+| :--------------: | :-----------: |
+|        0         |     8'h0      |
+|        1         |  {[0], 7'h0}  |
+|        2         | {[1:0], 6'h0} |
+|        ……        |      ……       |
+|        8         |     [7:0]     |
+|        9         |     [8:1]     |
+|        ……        |      ……       |
+|        29        |    [28:21]    |
 
 </div>
 
diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index c7482275..fded7c47 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -1,6 +1,6 @@
-from collections.abc import Sequence
 import sys
 from collections import defaultdict
+from collections.abc import Sequence
 from dataclasses import asdict, dataclass
 from enum import Enum
 from pathlib import Path
@@ -17,20 +17,20 @@
     InputWidthFormat,
     MaxPoolingEnable,
     NeuronAttrs,
+    NeuronConf,
     NeuronDestInfo,
     ParamsReg,
 )
 from paicorelib import ReplicationId as RId
 from paicorelib import (
-    NeuronConf,
     RoutingCoord,
     SNNModeEnable,
     SpikeWidthFormat,
     WeightWidth,
     get_replication_id,
 )
-from paicorelib.framelib import types as flib_types
 from paicorelib.framelib import OfflineFrameGen
+from paicorelib.framelib import types as flib_types
 from paicorelib.framelib.utils import _mask, np2bin, np2npy, np2txt
 
 if sys.version_info >= (3, 10):
@@ -42,7 +42,7 @@
 from paibox.utils import reverse_8bit
 
 from .context import _BACKEND_CONTEXT
-from .types import WRAMPackedType, AxonCoord, NeuSegment, NodeName
+from .types import AxonCoord, NeuSegment, NodeName, WRAMPackedType
 
 try:
     import orjson
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index d0d59af8..e3f54daa 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -13,7 +13,7 @@
     ResourceError,
     TruncationWarning,
 )
-from paibox.types import WeightType, WEIGHT_DTYPE
+from paibox.types import WEIGHT_DTYPE, WeightType
 from paibox.utils import check_attr_same
 
 from .conf_template import CoreConfig, CoreConfInChip, CorePlmConfig, NeuronConfig
@@ -474,19 +474,19 @@ def _fold_raw_weights(self, raw_weights: list[WeightType]) -> WeightType:
 
     def _weight_ram_mapping(self) -> WRAMPackedType:
         """Map the raw weights to the weight RAM(WRAM). The mapping is different for both input widths.
-        
+
         NOTE: When the input width is 8 bits, no neurons need to be mapped to the WRAM when the combination rate of \
-            dentrites >= 8, while some neurons need to be mapped to the WRAM when < 8. 
-            
+            dentrites >= 8, while some neurons need to be mapped to the WRAM when < 8.
+
             When the input width is 8 bits and with the combination rate of dentrites > 3, the mapping of weights   \
             becomes the key to limiting neuron capacity. In this case, if the weight accuracy is less than 8 bits   \
             (which may also occur when the weight accuracy is optimized), the weight cannot be folded directly in   \
             the fan-in expansion direction, otherwise the column of the WRAM will exceed the upper limit(512).      \
-            
+
             A portion of the fan-in needs to be expanded to an unfilled portion in the direction of the weight      \
             accuracy. At this point, n_fold=n_timeslot/(8/n_weight_bits)=2^(dendrite_comb_rate - 3). For example,   \
             for LCN_8X & WW8, the n_fold is 3. For LCN_32X & WW4, the n_fold is 4 (instead of 5).
-            
+
         TODO Now, in ANN mode, only the mapping of 8-bit weights is supported. The weight accuracy optimization is  \
             supposed to disable manually for now.
         """
diff --git a/paibox/backend/segment_utils.py b/paibox/backend/segment_utils.py
index e75bd8c5..d5b78d2c 100644
--- a/paibox/backend/segment_utils.py
+++ b/paibox/backend/segment_utils.py
@@ -307,7 +307,7 @@ def aligned_coords(
         | ------- AxonSeg[0] ------- | ------- AxonSeg[1] ------- | ...
     tr=0 A1[0]   A1[1]   ...  A1[99]   A2[0]   A2[1]   ... A2[199]
     tr=1 A1[100] A1[101] ... A1[199]   A2[200] A2[201] ... A2[399]
-    
+
     The target axon may be Ax[100:499], where (tr=0, offset+100) is the start and (tr=2, offset+499)\
         is the end.
             offset
@@ -316,7 +316,7 @@ def aligned_coords(
     tr=0  ...   Ax[0]   Ax[1]   ... Ax[199]
     tr=1  ...   Ax[200] Ax[201] ... Ax[399]
     tr=2  ...   Ax[400] Ax[401] ... Ax[599]
-    
+
     When the input width is 8 bits, each A[x] occupies 8 bits. The interval of axons is 8.
     """
     addr_width = axon_seg.addr_width
diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 6d879593..884f7733 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -7,11 +7,11 @@
 
 import numpy as np
 from paicorelib import (
-    InputWidthFormat,
-    SpikeWidthFormat,
     TM,
     HwConfig,
+    InputWidthFormat,
     SNNModeEnable,
+    SpikeWidthFormat,
     get_core_mode,
 )
 
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 67f87509..2180b13a 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -11,11 +11,11 @@
     RM,
     SIM,
     TM,
+    CoreMode,
     HwConfig,
     InputWidthFormat,
-    SpikeWidthFormat,
     SNNModeEnable,
-    CoreMode,
+    SpikeWidthFormat,
     get_core_mode,
 )
 
@@ -40,11 +40,11 @@
 from .utils import (
     BIT_TRUNCATE_MAX,
     NEG_THRES_MIN,
+    _input_width_format,
     _leak_v_check,
     _mask,
-    vjt_overflow,
-    _input_width_format,
     _spike_width_format,
+    vjt_overflow,
 )
 
 __all__ = ["Neuron"]
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index 055c03f8..479a22c8 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -15,14 +15,13 @@
 
 from paibox.exceptions import FunctionalError, PAIBoxWarning
 from paibox.types import (
-    LeakVType,
     NEUOUT_U8_DTYPE,
     SPIKE_DTYPE,
-    VoltageType,
     VOLTAGE_DTYPE,
+    LeakVType,
+    VoltageType,
 )
 
-
 NEG_THRES_MIN = -NEG_THRES_UNSIGNED_MAX
 
 
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 9ae205b6..205cd096 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -7,13 +7,13 @@
 
 from paibox.exceptions import AutoOptimizationWarning, ShapeError
 from paibox.types import (
+    VOLTAGE_DTYPE,
     WEIGHT_DTYPE,
     DataArrayType,
     IntScalarType,
     NeuOutType,
     SynOutType,
     WeightType,
-    VOLTAGE_DTYPE,
 )
 from paibox.utils import is_shape, shape2num, typical_round
 
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 516e89b6..73ec6dfe 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -2,7 +2,8 @@
 
 import numpy as np
 import pytest
-from paicorelib import Coord, HwConfig, WeightWidth as WW
+from paicorelib import Coord, HwConfig
+from paicorelib import WeightWidth as WW
 
 import paibox as pb
 from paibox.base import SynSys
diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index 7be18952..755f209c 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -1,11 +1,11 @@
 import numpy as np
 import pytest
-from paicorelib import HwConfig, LCN_EX
+from paicorelib import LCN_EX, HwConfig
 from paicorelib import WeightWidth as WW
 
 import paibox as pb
 from paibox.backend.placement import CorePlacement
-from paibox.backend.types import NeuSegment, WRAMUnpackedType, WRAM_PACKED_DTYPE
+from paibox.backend.types import WRAM_PACKED_DTYPE, NeuSegment, WRAMUnpackedType
 from paibox.exceptions import ResourceError
 from paibox.types import WEIGHT_DTYPE, WeightType
 
diff --git a/tests/components/neuron/test_neurons.py b/tests/components/neuron/test_neurons.py
index 5db42544..1fda7974 100644
--- a/tests/components/neuron/test_neurons.py
+++ b/tests/components/neuron/test_neurons.py
@@ -3,9 +3,9 @@
 from typing import Any, Literal
 
 import numpy as np
-from numpy.typing import NDArray
 import pytest
-from paicorelib import CoreMode, LCM, LDM, LIM, NTM, RM, SIM, TM, NeuronAttrs
+from numpy.typing import NDArray
+from paicorelib import LCM, LDM, LIM, NTM, RM, SIM, TM, CoreMode, NeuronAttrs
 
 import paibox as pb
 from paibox.components import Neuron
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 876a95a3..7a0b52b7 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -1,4 +1,5 @@
 from typing import Literal
+
 import numpy as np
 import pytest
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c482a099..ef279954 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,6 @@
 import pytest
 
-from paibox.utils import reverse_8bit, reverse_16bit, fn_sgn, typical_round
+from paibox.utils import fn_sgn, reverse_8bit, reverse_16bit, typical_round
 
 
 @pytest.mark.parametrize("a,b, expected", [(1, 0, 1), (1, 2, -1), (3, 3, 0)])

From a8512e3c97325b90ca1474e99f3e24f57e4c6496 Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Thu, 6 Jun 2024 14:29:26 +0800
Subject: [PATCH 028/187] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=BB=B6=E8=BF=9F?=
 =?UTF-8?q?=E5=85=A8=E8=BF=9E=E6=8E=A5=E5=B1=82=EF=BC=8C=E5=8D=8A=E6=8A=98?=
 =?UTF-8?q?=E5=8F=A0=E5=B1=82=EF=BC=8C=E8=BF=87=E6=BB=A4=E5=B1=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

延迟全连接层需在半折叠卷积与全连接层连接时使用，过滤层需要在整个半折叠网络最后的全连接层后使用
---
 .gitignore                               |   2 +
 hzy/hzy_test.py                          | 427 +++++++++++++++++++++++
 paibox/__init__.py                       |   3 +
 paibox/components/functional.py          | 310 ++++++++++++++--
 paibox/components/neuron/base.py         |  23 +-
 paibox/components/neuron/neurons.py      |   2 +-
 paibox/components/synapses/__init__.py   |   2 +-
 paibox/components/synapses/base.py       |  37 ++
 paibox/components/synapses/conv_utils.py |  22 ++
 paibox/components/synapses/synapses.py   |  77 ++--
 paibox/components/synapses/transforms.py |  33 ++
 paibox/network.py                        |  17 +-
 12 files changed, 886 insertions(+), 69 deletions(-)
 create mode 100644 hzy/hzy_test.py

diff --git a/.gitignore b/.gitignore
index d53852fe..c2fac1b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -155,3 +155,5 @@ cython_debug/
 .vscode
 debug
 *.drawio
+
+.idea/
diff --git a/hzy/hzy_test.py b/hzy/hzy_test.py
new file mode 100644
index 00000000..7063cfc4
--- /dev/null
+++ b/hzy/hzy_test.py
@@ -0,0 +1,427 @@
+
+
+import numpy as np
+import paibox as pb
+from paibox.components.functional import Conv_HalfRoll, Filter
+from paibox.components.synapses import Conv2dHalfRollSyn
+from paibox.components.synapses.conv_utils import _conv2d_halfroll
+from paibox.simulator.utils import _conv2d_faster_fp32
+
+
+class fcnet_2layer_dual_port(pb.Network):
+    def __init__(self, weight1, Vthr1, weight2, Vthr2):
+        super().__init__()
+
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(5,))
+        self.i2 = pb.InputProj(input=pe,shape_out=(5,))
+        self.n1 = pb.IF(10, threshold=Vthr1, reset_v=0, name="delay_1")
+        self.s1 = pb.FullConn(
+            self.i1,
+            self.n1,
+            weights=weight1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.n2 = pb.IF(
+            5, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="delay_2"
+        )
+        self.s2 = pb.FullConn(
+            self.i2,
+            self.n2,
+            weights=weight1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.n3 = pb.IF(
+            20, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="IF_1"
+        )
+        self.s3 = pb.FullConn(
+            self.n1,
+            self.n3,
+            weights=weight1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s4 = pb.FullConn(
+            self.n2,
+            self.n3,
+            weights=weight1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        # self.n3 = pb.IF(
+        #     20, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="IF_2"
+        # )
+        # self.s4 = pb.FullConn(
+        #     self.n2,
+        #     self.n3,
+        #     weights=weight1,
+        #     conn_type=pb.SynConnType.All2All,
+        # )
+
+        # tick_wait_start = 2 for second layer
+
+        # self.n3 = pb.IF(
+        #     5, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="batch_dual_port_o2"
+        # )
+        # self.s3 = pb.FullConn(
+        #     self.n1,
+        #     self.n2,
+        #     weights=weight2,
+        #     conn_type=pb.SynConnType.All2All,
+        # )
+        # self.s4 = pb.FullConn(
+        #     self.n1,
+        #     self.n3,
+        #     weights=weight2,
+        #     conn_type=pb.SynConnType.All2All,
+        # )
+        #
+        # self.probe1 = pb.Probe(target=self.n2, attr="spike")
+        # self.probe2 = pb.Probe(target=self.n3, attr="spike")
+
+class fcnet_3(pb.Network):
+    def __init__(self):
+        super().__init__()
+
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(2, 5, 5))
+        self.n1 = pb.IF((1, 7), threshold=1, reset_v=0, name="n_1")
+        self.n2 = pb.IF((1, 5, 5), threshold=1, reset_v=0, name="n_2")
+        self.n3 = pb.IF((1, 5, 5), threshold=1, reset_v=0, name="n_3")
+        self.n4 = pb.IF((1, 5), threshold=1, reset_v=0, name="n_4")
+        self.n5 = pb.IF((1, 3), threshold=1, reset_v=0, name="n_5")
+        self.n6 = pb.IF((1, 3), threshold=1, reset_v=0, name="n_6")
+        self.n7 = pb.IF((1, 3), threshold=1, reset_v=0, name="n_7")
+        self.s0 = pb.FullConn(
+            self.i1,
+            self.n1,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s1 = pb.FullConn(
+            self.n1,
+            self.n2,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s2 = pb.FullConn(
+            self.n2,
+            self.n3,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s3 = pb.FullConn(
+            self.n1,
+            self.n3,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s4 = pb.FullConn(
+            self.n3,
+            self.n4,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s5 = pb.FullConn(
+            self.n4,
+            self.n5,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s6 = pb.FullConn(
+            self.n3,
+            self.n5,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s7 = pb.FullConn(
+            self.n5,
+            self.n6,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s8 = pb.FullConn(
+            self.n6,
+            self.n7,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.s9 = pb.FullConn(
+            self.n5,
+            self.n7,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+weight1 = np.random.randint(0, 10, size=(32, 1, 5, 5), dtype=np.int8)
+weight2 = np.random.randint(0, 10, size=(32, 32, 2, 2), dtype=np.int8)
+weight3 = np.random.randint(0, 10, size=(64, 32, 5, 5), dtype=np.int8)
+weight4 = np.random.randint(0, 10, size=(64, 64, 2, 2), dtype=np.int8)
+
+class Conv2d_Net(pb.Network):
+    def __init__(self, Vthr1, Vthr2, Vthr3):
+        super().__init__()
+
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(1, 28, 28))
+        self.n1 = pb.IF((32, 24, 24), threshold=Vthr1, reset_v=0)
+        self.conv2d_1 = pb.Conv2d(self.i1, self.n1, kernel=weight1, stride=1)
+
+        self.n2 = pb.IF((32, 12, 12), threshold=Vthr2, reset_v=0, tick_wait_start=2)
+        self.conv2d_2 = pb.Conv2d(self.n1, self.n2, kernel=weight2, stride=2)
+
+        self.n3 = pb.IF((64, 8, 8), threshold=Vthr3, reset_v=0, tick_wait_start=3)
+        self.conv2d_3 = pb.Conv2d(self.n2, self.n3, kernel=weight3, stride=1)
+        self.n4 = pb.IF((64, 4, 4), threshold=Vthr3, reset_v=0, tick_wait_start=4)
+        self.conv2d_4 = pb.Conv2d(self.n3, self.n4, kernel=weight4, stride=2)
+        self.n5 = pb.IF((256,), threshold=Vthr3, reset_v=0, tick_wait_start=5)
+        self.fc1 = pb.FullConn(
+            self.n4, self.n5, weights=np.random.randint(0, 10, size=(1024, 256), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All
+        )
+        self.n6 = pb.IF((64,), threshold=Vthr3, reset_v=0, tick_wait_start=6)
+        self.fc2 = pb.FullConn(
+            self.n5, self.n6, weights=np.random.randint(0, 10, size=(256, 64), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All
+        )
+        self.n7 = pb.IF((10,), threshold=Vthr3, reset_v=0, tick_wait_start=7)
+        self.fc3 = pb.FullConn(
+            self.n6, self.n7, weights=np.random.randint(0, 10, size=(64, 10), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All
+        )
+
+        self.probe1 = pb.Probe(self.n3, "spike")
+
+
+
+input_data2 = np.array([1,0,1,0,1], dtype=np.bool_)
+class fcnet_4(pb.DynSysGroup):
+    def __init__(self):
+        super().__init__()
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(1, 28, 28))
+        #self.i1 = pb.InputProj(input=out_bypass1, shape_out=(1, 5))
+        self.n1 = pb.IF((1, 28), threshold=4, reset_v=0, name="n_1")
+        self.s0 = pb.FullConn(
+            self.i1,
+            self.n1,
+            weights=1,
+            conn_type=pb.SynConnType.All2All,
+        )
+        # self.probe1 = pb.Probe(self.n1, "spike")
+        self.n2 = pb.IF((32, 24, 24), threshold=0, reset_v=0, name="n_2")
+        #self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, np.array([[[[2,1,2],[1,2,1],[1,2,3]]]], dtype=np.int8), 1, tick_wait_start=1)
+        self.conv1 = pb.ConvHalfRoll(self.n1, self.n2, weight1, 1)
+        self.n3 = pb.IF((32, 12, 12), threshold=1, reset_v=0, name="n_3")
+        self.conv2 = pb.ConvHalfRoll(self.n2, self.n3, weight2, 2)
+        self.n4 = pb.IF((64, 8, 8), threshold=1, reset_v=0, name="n_4")
+        self.conv3 = pb.ConvHalfRoll(self.n3, self.n4, weight3, 1)
+        self.n5 = pb.IF((64, 4, 4), threshold=1, reset_v=0, name="n_5")
+        self.conv4 = pb.ConvHalfRoll(self.n4, self.n5, weight4, 2)
+        self.n6 = pb.IF((256,), threshold=1, reset_v=0, name="n_6")
+        self.linear1 = pb.DelayFullConn(
+            self.n5,
+            self.n6,
+            delay=4,
+            weights=np.random.randint(0, 10, size=(1024, 256), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.n7 = pb.IF((64,), threshold=1, reset_v=0, name="n_7")
+        self.linear2 = pb.FullConn(
+            self.n6,
+            self.n7,
+            weights=np.random.randint(0, 10, size=(256, 64), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.n8 = pb.IF((10,), threshold=1, reset_v=0, name="n_8")
+        self.linear2 = pb.FullConn(
+            self.n7,
+            self.n8,
+            weights=np.random.randint(0, 10, size=(64, 10), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.filter = pb.Filter(self.n8, 28)
+
+
+def out_bypass1(t, data1, *args, **kwargs):
+    return data1
+
+input_data1 = np.array([[1,2,5,7,5],
+                       [2,0,8,8,2],
+                       [3,8,5,7,5],
+                       [4,9,2,5,4],
+                       [5,10,2,3,8],
+                       [0,0,0,0,0],
+                       [0,0,0,0,0],
+                       [0,0,0,0,0],
+                       [0,0,0,0,0]], dtype=np.int8)
+class fcnet_5(pb.DynSysGroup):
+    def __init__(self):
+        super().__init__()
+        self.i1 = pb.InputProj(input=out_bypass1, shape_out=(1, 5))
+        self.conv1 = pb.ConvHalfRoll(self.i1, np.array([[[[2,1,2],[1,2,1],[1,2,3]]]], dtype=np.int8), 1, 1, tick_wait_start=1)
+        self.conv2 = pb.ConvHalfRoll(self.conv1, np.array([[[[-2,1,2],[1,-2,1],[1,2,-3]]]], dtype=np.int8),1,0, tick_wait_start=3)
+        # self.linear1 = pb.DelayFullConn(
+        #     self.n5,
+        #     self.n6,
+        #     delay=4,
+        #     weights=np.random.randint(0, 10, size=(1024, 256), dtype=np.int8),
+        #     conn_type=pb.SynConnType.All2All,
+        # )
+        # self.filter = pb.Filter(self.n8, 28)
+pb_net = fcnet_5()
+
+generated = pb.DynSysGroup.build_fmodule(pb_net)
+
+sim1 = pb.Simulator(pb_net, start_time_zero=False)
+
+for i in range(9):
+    pb.FRONTEND_ENV.save(data1=input_data1[i])
+    sim1.run(1)
+    #print(pb_net.nd_Conv_HalfRoll_0.output)
+    print(pb_net.nd_Conv_HalfRoll_1.output)
+
+output =_conv2d_faster_fp32(np.array([[[1,2,3,4,5],[2,0,8,9,10],[5,8,5,2,2],[7,8,7,5,3],[5,2,5,4,8]]]),
+                            np.array([[[[2,1,2],[1,2,1],[1,2,3]]]], dtype=np.int8),
+                            (1,1),
+                            (1,1))
+#print(output)
+output = _conv2d_faster_fp32(np.array([[[8,34,55,72,43],[43,56,64,71,48],[58,90,83,82,45],[59,82,76,73,37],[35,50,49,46,33]]]),
+                             np.array([[[[-2,1,2],[1,-2,1],[1,2,-3]]]], dtype=np.int8),(1,1),(0,0))
+print(output)
+
+
+
+class deeplabv2(pb.DynSysGroup):
+    def __init__(self):
+        super().__init__()
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(3, 256))
+        self.n1 = pb.LIF((64, 254), threshold=0, reset_v=0, name="n_1")
+        self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, np.random.randint(0, 10, size=(64,3,3,3), dtype=np.int8), 1)
+        self.n2 = pb.LIF((64, 252), threshold=0, reset_v=0, name="n_2")
+        self.conv2 = pb.ConvHalfRoll(self.n1, self.n2, np.random.randint(0, 10, size=(64,64,3,3), dtype=np.int8), 1)
+        self.n3 = pb.LIF((64, 127), threshold=0, reset_v=0, name="n_3")
+        self.maxpool2d1 = pb.ConvHalfRoll(self.n2, self.n3, np.random.randint(0, 1, size=(64, 64, 3, 3), dtype=np.bool_), 2)
+        self.n4 = pb.LIF((128, 125), threshold=0, reset_v=0, name="n_4")
+        self.conv3 = pb.ConvHalfRoll(self.n3, self.n4, np.random.randint(0, 10, size=(128,64,3,3), dtype=np.int8), 1)
+        self.n5 = pb.LIF((128, 123), threshold=0, reset_v=0, name="n_5")
+        self.conv4 = pb.ConvHalfRoll(self.n4, self.n5, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
+        self.n6 = pb.LIF((128, 62), threshold=0, reset_v=0, name="n_6")
+        self.maxpool2d2 = pb.ConvHalfRoll(self.n5, self.n6, np.random.randint(0, 1, size=(128, 128, 3, 3), dtype=np.bool_), 2)
+        self.n7 = pb.LIF((128, 60), threshold=0, reset_v=0, name="n_7")
+        self.conv5 = pb.ConvHalfRoll(self.n6, self.n7, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
+        self.n8 = pb.LIF((2, 58), threshold=0, reset_v=0, name="n_8")
+        self.conv6 = pb.ConvHalfRoll(self.n7, self.n8, np.random.randint(0, 10, size=(2, 128, 3, 3), dtype=np.int8), 1)
+        self.n9 = pb.IF((116,), threshold=1, reset_v=0, name="n_9")
+        self.linear2 = pb.FullConn(
+            self.n8,
+            self.n9,
+            weights=np.random.randint(0, 1, size=(116, 116), dtype=np.bool_),
+            conn_type=pb.SynConnType.All2All,
+        )
+# w = np.array(   [[
+#                 [[2, 2, 2],[5,5,5],[9,9,9]],
+#                 [[1, 1, 1],[4,4,4],[7,7,7]],
+#                 ]]
+# )
+w = np.random.randint(1, 10, size=(1,1,3,3), dtype=np.int8)
+class deeplabv3(pb.DynSysGroup):
+    def __init__(self):
+        super().__init__()
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(1, 10))
+        self.n1 = pb.LIF((1, 10), threshold=0, reset_v=0, name="n_1")
+        self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, w, 1)
+
+
+        # self.n2 = pb.LIF((64, 28), threshold=0, reset_v=0, name="n_2")
+        # self.conv2 = pb.ConvHalfRoll(self.n1, self.n2, np.random.randint(0, 10, size=(64,64,3,3), dtype=np.int8), 1)
+        # self.n3 = pb.LIF((100, 24), threshold=0, reset_v=0, name="n_3")
+        # self.maxpool2d1 = pb.ConvHalfRoll(self.n1, self.n3, np.random.randint(0, 1, size=(100, 100, 3, 3), dtype=np.bool_), 2)
+        # #
+        # self.n4 = pb.LIF((8, 22), threshold=0, reset_v=0, name="n_4")
+        # self.conv3 = pb.ConvHalfRoll(self.n3, self.n4, np.random.randint(0, 10, size=(8,100,3,3), dtype=np.int8), 1)
+        # # # self.n5 = pb.LIF((128, 10), threshold=0, reset_v=0, name="n_5")
+        # # # self.conv4 = pb.ConvHalfRoll(self.n4, self.n5, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
+        # # # self.n6 = pb.LIF((128, 5), threshold=0, reset_v=0, name="n_6")
+        # # # self.maxpool2d2 = pb.ConvHalfRoll(self.n5, self.n6, np.random.randint(0, 1, size=(128, 128, 3, 3), dtype=np.bool_), 2)
+        # # # self.n7 = pb.LIF((128, 3), threshold=0, reset_v=0, name="n_7")
+        # # # self.conv5 = pb.ConvHalfRoll(self.n6, self.n7, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
+        # self.n8 = pb.LIF((2, 251), threshold=0, reset_v=0, name="n_8")
+        # self.conv6 = pb.ConvHalfRoll(self.n4, self.n8, np.random.randint(0, 10, size=(2, 8, 3, 3), dtype=np.int8), 1)
+        # self.n9 = pb.IF((54,), threshold=1, reset_v=0, name="n_9")
+        # self.linear2 = pb.DelayFullConn(
+        #     self.n8,
+        #     self.n9,
+        #     delay=27,
+        #     weights=np.random.randint(0, 1, size=(2*27*27, 54), dtype=np.bool_),
+        #     conn_type=pb.SynConnType.All2All,
+        # )
+        # self.linear2 = pb.FullConn(
+        #     self.n8,
+        #     self.n9,
+        #     weights=np.random.randint(0, 1, size=(2 * 9, 10), dtype=np.bool_),
+        #     conn_type=pb.SynConnType.All2All,
+        # )
+class snn3(pb.DynSysGroup):
+    def __init__(self):
+        super().__init__()
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(1, 128))
+        self.n1 = pb.LIF((64, 128), threshold=0, reset_v=0, name="n_1")
+        self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, np.random.randint(0,10, size=(64,1,3,3), dtype=np.int8), 1)
+        self.n3 = pb.LIF((64, 64), threshold=0, reset_v=0, name="n_3")
+        self.maxpool2d1 = pb.ConvHalfRoll(self.n1, self.n3, np.random.randint(0, 1, size=(64, 64, 2, 2), dtype=np.bool_), 2)
+        self.n4 = pb.LIF((64, 64), threshold=0, reset_v=0, name="n_4")
+        self.conv2 = pb.ConvHalfRoll(self.n3, self.n4, np.random.randint(0, 10, size=(64, 64, 3, 3), dtype=np.int8), 1)
+        self.n5 = pb.LIF((64, 32), threshold=0, reset_v=0, name="n_5")
+        self.maxpool2d2 = pb.ConvHalfRoll(self.n4, self.n5,
+                                          np.random.randint(0, 1, size=(64, 64, 2, 2), dtype=np.bool_), 2)
+        self.n6 = pb.LIF((64, 4, 4), threshold=0, reset_v=0, name="n_6")
+
+        self.linear1 = pb.DelayFullConn(
+            self.n5,
+            self.n6,
+            delay=32,
+            weights=np.random.randint(0, 10, size=(64*32*32, 64*4*4), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All,
+        )
+        self.n7 = pb.LIF((10,), threshold=0, reset_v=0, name="n_7")
+
+        self.linear2 = pb.FullConn(
+            self.n6,
+            self.n7,
+            weights=np.random.randint(0, 10, size=(64 * 4 * 4, 10), dtype=np.int8),
+            conn_type=pb.SynConnType.All2All,
+        )
+kernel = np.array([[[[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]]], dtype=np.int8)
+class paddingnet(pb.DynSysGroup):
+    def __init__(self):
+        super().__init__()
+        pe = pb.simulator.PoissonEncoder()
+        self.i1 = pb.InputProj(input=pe, shape_out=(1, 4))
+        self.n1 = pb.IF((1, 4, 4), threshold=0, reset_v=0, name="n_1")
+        self.conv = pb.ConvHalfRoll(self.i1, self.n1, kernel, stride=1, padding=1)
+
+#pb_net.conv.build(pb_net, 3)
+#
+# pb.BACKEND_CONFIG.target_chip_addr = [(0, 0), (0, 1)]
+# mapper = pb.Mapper()
+# mapper.build(pb_net)
+#
+# graph_info = mapper.compile()
+# print("Core required:", graph_info["n_core_required"])
+# print("Core occupied:", graph_info["n_core_occupied"])
+
+
+
+# #print(graph_info["members"])
+# for k, v in graph_info["members"].items():
+#     for c, coreplm in v.items():
+#         print(c)
+#         for k, v in coreplm.neuron_configs.items():
+#             print(k.name,v)
+#             for n,s in k.master_nodes.items():
+#                 print(s.name)
+#                 # print(s.connectivity)
+
diff --git a/paibox/__init__.py b/paibox/__init__.py
index 9b83ae73..6f076fda 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -14,6 +14,9 @@
 from .components.functional import SpikingSub as SpikingSub
 from .components.functional import Transpose2d as Transpose2d
 from .components.functional import Transpose3d as Transpose3d
+from .components.functional import Conv_HalfRoll as ConvHalfRoll
+from .components.functional import Filter as Filter
+from .components.functional import Delay_FullConn as DelayFullConn
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 764ce2c5..e230aae0 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1,7 +1,7 @@
 import sys
 from collections.abc import Sequence
 from functools import partial
-from typing import Literal, Optional, Union
+from typing import Literal, Optional, Union, ClassVar
 
 import numpy as np
 from paicorelib import NTM, RM, TM
@@ -39,7 +39,7 @@
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, FullConnSyn
+from .synapses import ConnType, FullConnSyn, Conv2dHalfRollSyn
 from .synapses.conv_types import _Size2Type
 from .synapses.conv_utils import _fm_ndim2_check, _pair
 from .synapses.transforms import Conv2dForward, _Pool2dForward
@@ -62,6 +62,9 @@
     "SpikingSub",
     "Transpose2d",
     "Transpose3d",
+    "Conv_HalfRoll",
+    "Filter",
+    "Delay_FullConn"
 ]
 
 
@@ -791,14 +794,14 @@ class SpikingSub(FunctionalModule2to1WithV):
     pos_threshold: int = 1
 
     def __init__(
-        self,
-        neuron_a: Union[NeuDyn, InputProj],
-        neuron_b: Union[NeuDyn, InputProj],
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        overflow_strict: bool = False,
-        **kwargs,
+            self,
+            neuron_a: Union[NeuDyn, InputProj],
+            neuron_b: Union[NeuDyn, InputProj],
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            overflow_strict: bool = False,
+            **kwargs,
     ) -> None:
         """Spiking subtraction module. The result will be reflected in time dimension.
 
@@ -865,12 +868,12 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 @set_rt_mode(1, 1, 1)
 class Transpose2d(TransposeModule):
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """2d transpose module.
 
@@ -924,13 +927,13 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 @set_rt_mode(1, 1, 1)
 class Transpose3d(TransposeModule):
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        axes: Optional[Sequence[int]] = None,
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            axes: Optional[Sequence[int]] = None,
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """3d transpose module.
 
@@ -981,6 +984,258 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(8, 8, 0)
+class Delay_FullConn(FunctionalModule):
+    def __init__(
+            self,
+            neuron_s: Union[NeuDyn, InputProj],
+            neuron_d: Union[NeuDyn, InputProj],
+            delay: int,
+            weights: DataArrayType = 1,
+            conn_type: ConnType = ConnType.MatConn,
+            keep_shape: bool = False,
+            name: Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        self.delay = delay
+        self.weights = weights
+        self.conn_type = conn_type
+        _shape_out = neuron_d.shape_out
+        super().__init__(
+            neuron_s,
+            neuron_d,
+            shape_out=_shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        return
+
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        if len(self.module_intf.operands[0].shape_out)!=2:
+            raise ShapeError("The source node must be a successor to the half-convolution")
+        delay_shape = self.module_intf.operands[0].shape_out
+        delay_neurons = []
+        for i in range(self.delay):
+            neuron = Neuron(
+                shape=delay_shape,
+                leak_v=0,
+                neg_threshold=0,
+                delay=i+1,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                keep_shape=self.keep_shape,
+                name=f"n{i}_{self.name}",
+            )
+            delay_neurons.append(neuron)
+            # 延时突触
+            syn1 = FullConnSyn(
+                self.module_intf.operands[0],
+                delay_neurons[i],
+                weights=_delay_mapping(delay_shape[1], delay_shape[0], 1),
+                conn_type=ConnType.All2All,
+                name=f"s{i}_delay",
+            )
+            #w = np.zeros((neuron.num_out, self.module_intf.operands[1].num_out))
+            w = self.weights[i::self.delay, :]
+            syn2 = FullConnSyn(  # cin,(kw-1)*ih -> cout * oh
+                delay_neurons[i], # 54 -> 54
+                self.module_intf.operands[1],
+                weights=w,
+                conn_type=self.conn_type,
+                name=f"s{i}_{self.name}",
+            )
+            network._add_components(neuron, syn1, syn2)
+            network._remove_components(self)
+            generated = [*delay_neurons, syn1, syn2]
+        return generated
+
+
+@set_rt_mode(8, 8, 0)
+class Conv_HalfRoll(FunctionalModule):
+    _spatial_ndim: ClassVar[int] = 2
+
+    def __init__(
+            self,
+            neuron_s: Union[NeuDyn, InputProj],
+            #neuron_d: Union[NeuDyn, InputProj],
+            kernel: np.ndarray,
+            stride: Optional[_Size2Type] = None,
+            padding: _Size2Type = 0,
+            keep_shape: bool = False,
+            name: Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        """2d conv_halfroll for spike.
+
+        """
+        self.kernel = kernel
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        if kernel.ndim != self._spatial_ndim + 2:
+            raise ShapeError(
+                f"convolution kernel dimension must be {self._spatial_ndim + 2}, but got {kernel.ndim}."
+            )
+
+        if len(neuron_s.shape_out) != 2:
+            in_ch, in_h, in_w = _fm_ndim2_check(neuron_s.shape_out, "CHW")
+            neuron_s.shape_change((in_ch, in_h))
+        in_ch, in_h = neuron_s.shape_out
+        cout, cin, kh, kw = kernel.shape
+        # if len(neuron_d.shape_out) != 2:
+        #     out_ch, out_h, out_w = _fm_ndim2_check(neuron_d.shape_out, "CHW")
+        #     neuron_d.shape_change((cout, out_h))
+
+
+        out_h = (in_h - kh + 2 * self.padding[0] ) // self.stride[0] + 1
+        if in_ch != cin:
+            raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
+
+
+        _shape_out = (cout, out_h)
+
+        super().__init__(
+            neuron_s,
+            #neuron_d,
+            shape_out=_shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        #print("进入function.spike_func")
+        return
+
+    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+        #print("进入build")
+        in_ch, in_h = self.module_intf.operands[0].shape_out
+        cout, cin, kh, kw = self.kernel.shape
+        n_delays = NodeList()
+        s_delays = NodeList()
+        relu = Neuron(
+            self.shape_out,
+            reset_mode=RM.MODE_NONRESET,
+            neg_thres_mode=NTM.MODE_SATURATION,
+            leak_v=0,
+            neg_threshold=0,
+            pos_threshold=0,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start+1,
+            tick_wait_end=self.tick_wait_end,
+            input_width=self.input_width,
+            spike_width=self.spike_width,
+            snn_en=self.snn_en,
+            keep_shape=self.keep_shape,
+            name=f"nd_{self.name}",
+        )
+        for i in range(kw):
+            neuron = Neuron(
+                (cin, in_h),
+                leak_v=0,
+                neg_threshold=0,
+                delay=delay*i+1,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                input_width=self.input_width,
+                spike_width=self.spike_width,
+                snn_en=self.snn_en,
+                keep_shape=self.keep_shape,
+                name=f"n{i}_{self.name}",
+            )
+            n_delays.append(neuron)
+            # 延时突触
+            syn1 = FullConnSyn(
+                self.module_intf.operands[0],# (2, 5)
+                n_delays[i],
+                weights=_delay_mapping(in_h, cin, 1),
+                conn_type=ConnType.All2All,
+                name=f"s{i}_delay_{self.name}",
+            )
+            s_delays.append(syn1)
+            syn2 = Conv2dHalfRollSyn(  # cin, ih -> cout * oh
+                n_delays[i],
+                relu,
+                kernel=self.kernel[:, :, :, kw-i-1],
+                stride=self.stride,
+                padding=self.padding,
+                order="OIHW",
+                name=f"s{i}_{self.name}",
+            )
+            s_delays.append(syn2)
+
+        generated = [relu, *n_delays, *s_delays]
+        self._rebuild_out_intf(network, relu, *generated, **build_options)
+
+        return generated
+
+@set_rt_mode(8, 8, 0)
+class Filter(FunctionalModule):
+
+    def __init__(
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            time_to_fire: int,
+            keep_shape: bool = False,
+            name: Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        """
+        """
+        shape_out = neuron.shape_out
+        self.time_to_fire = time_to_fire
+        self.cur_time = 0
+        super().__init__(
+            neuron,
+            shape_out=shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        if self.cur_time != self.time_to_fire:
+            self.cur_time += 1
+            return np.zeros_like(x1)
+        else:
+            self.cur_time = 0
+            return x1
+
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        inp1 = Always1Neuron((2,))
+        n1_filter = Neuron(
+            self.shape_out,
+            leak_v=0,
+            neg_threshold=0,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start,
+            tick_wait_end=self.tick_wait_end,
+            keep_shape=self.keep_shape,
+            name="filter"
+        )
+
+        syn1 = FullConnSyn(
+            self.module_intf.operands[0],  # (10,0)
+            n1_filter,  # (10,0)
+            weights=1,
+            conn_type=ConnType.One2One,
+            name=f"s0_{self.name}",
+        )
+        syn2 = FullConnSyn(
+            inp1,  # (2,0)
+            n1_filter,  # (10,0)
+            weights=-128,
+            conn_type=ConnType.All2All,
+            name=f"s1_{self.name}",
+        )
+        network._add_components(n1_filter, syn1, syn2)
+        network._remove_components(self)
+        generated = [n1_filter, syn1, syn2]
+        return generated
+
+
 def _spike_func_sadd_ssub(
     vjt: VoltageType, pos_thres: int, reset_v: Optional[int] = None
 ) -> tuple[NeuOutType, VoltageType]:
@@ -1085,3 +1340,12 @@ def _transpose3d_mapping(
         ] = 1
 
     return mt
+
+
+def _delay_mapping(h: int, cin: int, n: int) -> WeightType:
+    mt = np.zeros((cin * h, cin * n * h), dtype=np.bool_)
+    for i in range(cin):
+        for j in range(n * cin):
+            for k in range(h):
+                mt[i * h + k, j * h + k] = 1
+    return mt
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 2180b13a..4f54a5dd 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -318,7 +318,7 @@ def _bit_truncate(self, vj: VoltageType) -> NeuOutType:
         """
 
         def _truncate() -> VoltageType:
-            if (vj >> self.bit_truncation) > 0:  # Saturate truncation
+            if (vj >> self.bit_truncation).all() > 0:  # Saturate truncation
                 return np.full_like(vj, _mask(8))
             elif self.bit_truncation == 0:
                 return self._vjt0
@@ -326,7 +326,6 @@ def _truncate() -> VoltageType:
                 return (vj << (8 - self.bit_truncation)) & _mask(8)
             else:
                 return (vj >> (self.bit_truncation - 8)) & _mask(8)
-
         v_truncated = np.where(
             self.thres_mode == TM.EXCEED_POSITIVE, _truncate(), self._vjt0
         )
@@ -499,7 +498,6 @@ def update(
             x = self.sum_inputs()
         else:
             x = np.atleast_1d(x)
-
         self._neu_out, self._vjt = super().update(x, self._vjt)
 
         idx = (self.timestamp + self.delay_relative - 1) % HwConfig.N_TIMESLOT_MAX
@@ -585,6 +583,25 @@ def _slice_attrs(
     def __getitem__(self, index) -> "NeuronSubView":
         return NeuronSubView(self, index)
 
+    def shape_change(self, new_shape: Shape) -> None:
+        #print(self.name,"shape change")
+        self._n_neuron = shape2num(new_shape)
+        self._shape = as_shape(new_shape)
+        self._vjt = self.init_param(0).astype(np.int32)
+        self.set_reset_value("_vjt", self._vjt)
+        self._inner_spike = self.init_param(0).astype(np.bool_)
+        self.set_reset_value("_inner_spike", self._inner_spike)
+        self.vj = self.init_param(0).astype(np.int32)
+        self.set_reset_value("vj", self.vj)
+        self.y = self.init_param(0).astype(np.int32)
+        self.set_reset_value("y", self.y)
+        self.delay_registers = np.zeros(
+                (HwConfig.N_TIMESLOT_MAX,) + self._inner_spike.shape, dtype=np.bool_
+            )
+        self.set_reset_value("delay_registers", self.delay_registers)
+
+        return
+
     @property
     def shape_in(self) -> tuple[int, ...]:
         return self._shape
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 46b37251..48619307 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -8,7 +8,7 @@
 from .base import Neuron
 from .utils import LEAK_V_MAX
 
-__all__ = ["IF", "LIF", "TonicSpiking", "PhasicSpiking", "SpikingRelu"]
+__all__ = ["IF", "LIF", "TonicSpiking", "PhasicSpiking", "SpikingRelu", "Always1Neuron"]
 
 
 class IF(Neuron):
diff --git a/paibox/components/synapses/__init__.py b/paibox/components/synapses/__init__.py
index c0edd77a..f5167f5c 100644
--- a/paibox/components/synapses/__init__.py
+++ b/paibox/components/synapses/__init__.py
@@ -1,2 +1,2 @@
-from .base import FullConnectedSyn, FullConnSyn
+from .base import FullConnectedSyn, FullConnSyn, Conv2dHalfRollSyn
 from .transforms import ConnType
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 52b09ae1..dd55f0da 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -18,6 +18,7 @@
     ConnType,
     Conv1dForward,
     Conv2dForward,
+    Conv2dHalfForward,
     ConvTranspose1dForward,
     ConvTranspose2dForward,
     Identity,
@@ -325,6 +326,42 @@ def __init__(
         )
 
 
+
+class Conv2dHalfRollSyn(FullConnectedSyn):
+
+    def __init__(
+            self,
+            source: Union[NeuDyn, InputProj],
+            dest: Neuron,
+            kernel: np.ndarray,
+            stride: tuple[int, int],
+            padding: tuple[int, int],
+            order: _KOrder4d = "OIHW",
+            name: Optional[str] = None,
+    ) -> None:
+        super().__init__(source, dest, name)
+        #print("进入halfroll")
+        if order == "IOHW":
+            _kernel = np.swapaxes(kernel, 0, 1)
+        else:
+            _kernel = kernel.copy()
+
+        # O,I,H,W
+        out_channels, in_channels, kernel_h = _kernel.shape
+        # C,H,W
+        if len(source.shape_out) == 2:
+            in_ch, in_h = source.shape_out
+        else:
+            in_ch, in_h, in_w = _fm_ndim2_check(source.shape_out, "CHW")
+        out_h = (in_h + 2 * padding[0] - kernel_h) // stride[0] + 1
+
+        if in_ch != in_channels:
+            raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
+
+        #comm = Conv2dForward((in_h, in_w), (out_h, out_w), _kernel, stride, padding)
+        self.comm = Conv2dHalfForward((in_ch, in_h), (out_channels, out_h), _kernel, stride, padding)
+        #print(self.comm.connectivity)
+
 class ConvTranspose1dSyn(FullConnectedSyn):
     _spatial_ndim: ClassVar[int] = 1
 
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index a7235084..64194f16 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -183,6 +183,28 @@ def _conv2d_unroll(
     return w_unrolled
 
 
+def _conv2d_halfroll(
+        in_shape: Size2Type,
+        out_shape: Size2Type,
+        kernel: WeightType,
+        stride: Size2Type,
+        padding: Size2Type,
+) -> WeightType:
+    cout, cin, kh= kernel.shape
+    #ih = in_shape[1] + 2 * padding[0]
+    ih = in_shape[1]
+    o_ch, oh = out_shape
+    w_np = np.zeros((cin * ih, cout * oh), dtype=kernel.dtype)
+    for i in range(cout):
+        for j in range(cin):
+            for k in range(oh-2*padding[0]):
+                w_np[j*ih+k*stride[1]:j*ih+k*stride[1]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
+            for k in range(padding[0]):
+                w_np[j*ih+k*stride[1]:j*ih+k*stride[1]+kh-padding[0], i*oh+k] = kernel[i, j, k+1:]
+                w_np[j*ih+ih-padding[0]-1:j*ih+ih, (i+1)*oh-1] = kernel[i, j, :-(k+1)]
+    return w_np
+
+
 def _pool2d_kernel_unroll(
     channels: int,
     in_shape: Size2Type,
diff --git a/paibox/components/synapses/synapses.py b/paibox/components/synapses/synapses.py
index 8c52d7eb..c7a644ea 100644
--- a/paibox/components/synapses/synapses.py
+++ b/paibox/components/synapses/synapses.py
@@ -95,15 +95,15 @@ def __init__(
 
 class Conv1d(Conv1dSyn):
     def __init__(
-        self,
-        source: Union[Neuron, InputProj],
-        dest: Neuron,
-        kernel: np.ndarray,
-        *,
-        stride: _Size1Type = 1,
-        padding: _Size1Type = 0,
-        kernel_order: _KOrder3d = "OIL",
-        name: Optional[str] = None,
+            self,
+            source: Union[Neuron, InputProj],
+            dest: Neuron,
+            kernel: np.ndarray,
+            *,
+            stride: _Size1Type = 1,
+            padding: _Size1Type = 0,
+            kernel_order: _KOrder3d = "OIL",
+            name: Optional[str] = None,
     ) -> None:
         """1d convolution synapses in fully-unrolled format.
 
@@ -137,15 +137,15 @@ def __init__(
 
 class Conv2d(Conv2dSyn):
     def __init__(
-        self,
-        source: Union[Neuron, InputProj],
-        dest: Neuron,
-        kernel: np.ndarray,
-        *,
-        stride: _Size2Type = 1,
-        padding: _Size2Type = 0,
-        kernel_order: _KOrder4d = "OIHW",
-        name: Optional[str] = None,
+            self,
+            source: Union[Neuron, InputProj],
+            dest: Neuron,
+            kernel: np.ndarray,
+            *,
+            stride: _Size2Type = 1,
+            padding: _Size2Type = 0,
+            kernel_order: _KOrder4d = "OIHW",
+            name: Optional[str] = None,
     ) -> None:
         """2d convolution synapses in fully-unrolled format.
 
@@ -179,16 +179,16 @@ def __init__(
 
 class ConvTranspose1d(ConvTranspose1dSyn):
     def __init__(
-        self,
-        source: Union[Neuron, InputProj],
-        dest: Neuron,
-        kernel: np.ndarray,
-        *,
-        stride: _Size1Type = 1,
-        padding: _Size1Type = 0,
-        output_padding: _Size1Type = 0,
-        kernel_order: _KOrder3d = "OIL",
-        name: Optional[str] = None,
+            self,
+            source: Union[Neuron, InputProj],
+            dest: Neuron,
+            kernel: np.ndarray,
+            *,
+            stride: _Size1Type = 1,
+            padding: _Size1Type = 0,
+            output_padding: _Size1Type = 0,
+            kernel_order: _KOrder3d = "OIL",
+            name: Optional[str] = None,
     ) -> None:
         """1d transposed convolution synapses in fully-unrolled format.
 
@@ -226,16 +226,16 @@ def __init__(
 
 class ConvTranspose2d(ConvTranspose2dSyn):
     def __init__(
-        self,
-        source: Union[Neuron, InputProj],
-        dest: Neuron,
-        kernel: np.ndarray,
-        *,
-        stride: _Size2Type = 1,
-        padding: _Size2Type = 0,
-        output_padding: _Size2Type = 0,
-        kernel_order: _KOrder4d = "OIHW",
-        name: Optional[str] = None,
+            self,
+            source: Union[Neuron, InputProj],
+            dest: Neuron,
+            kernel: np.ndarray,
+            *,
+            stride: _Size2Type = 1,
+            padding: _Size2Type = 0,
+            output_padding: _Size2Type = 0,
+            kernel_order: _KOrder4d = "OIHW",
+            name: Optional[str] = None,
     ) -> None:
         """2d transposed convolution synapses in fully-unrolled format.
 
@@ -272,3 +272,4 @@ def __init__(
             kernel_order,
             name,
         )
+
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 205cd096..dcc0d0af 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -23,6 +23,7 @@
     _conv1d_unroll,
     _conv2d_faster,
     _conv2d_unroll,
+    _conv2d_halfroll,
     _convtranspose1d_faster,
     _convtranspose1d_unroll,
     _convtranspose2d_faster,
@@ -395,6 +396,38 @@ def connectivity(self):
             self.in_shape, self.out_shape, self.weights, self.stride, self.padding
         )
 
+class Conv2dHalfForward(Transform):
+    def __init__(
+        self,
+        in_shape: Size2Type,
+        out_shape: Size2Type,
+        kernel: np.ndarray,
+        stride: Size2Type,
+        padding: Size2Type,
+        # fm_order: _Order3d,
+    ) -> None:
+        self.in_shape = in_shape
+        self.out_shape = out_shape
+        self.stride = stride
+        self.padding = padding
+        self.kernel = kernel
+        # self.fm_order = fm_order
+
+        super().__init__(kernel)
+
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
+        # print(x)
+        # print(self.connectivity)
+        # print(x@self.connectivity)
+        return x @ self.connectivity
+
+
+    @property
+    def connectivity(self):
+        return _conv2d_halfroll(
+            self.in_shape, self.out_shape, self.kernel, self.stride, self.padding
+        )
+
 
 class ConvTranspose1dForward(Transform):
     def __init__(
diff --git a/paibox/network.py b/paibox/network.py
index 05b3c881..d399e7c1 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -73,11 +73,22 @@ def __call__(self, **kwargs) -> None:
     def build_fmodule(
         cls, network: "DynSysGroup", **build_options
     ) -> dict[NeuModule, BuiltComponentType]:
+        try:
+            from .components.functional import Conv_HalfRoll
+        except ImportError:
+            Conv_HalfRoll = None
         generated = dict()
-        modules = network.nodes().subset(NeuModule).unique()
-
+        modules = network.components.subset(NeuModule).unique()
+        delay = 0
         for module in modules.values():
-            generated[module] = module.build(network, **build_options)
+            if Conv_HalfRoll is not None and isinstance(module, Conv_HalfRoll):
+                #print(module.stride)
+                generated[module] = module.build(network, module.stride[1] ** (delay), **build_options)
+                if module.stride[1] != 1 :
+                    delay += 1
+
+            else:
+                generated[module] = module.build(network, **build_options)
 
         return generated
 

From 935f4fe76340c6bae34b8e31bb9dd35655a69362 Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Fri, 12 Jul 2024 15:58:24 +0800
Subject: [PATCH 029/187] add tests for conv_semimap

---
 hzy/hzy_test.py                          | 247 ++++++--------------
 paibox/components/functional.py          |  83 ++++---
 paibox/components/synapses/base.py       |   8 +-
 paibox/components/synapses/conv_utils.py | 280 ++++++++++++-----------
 paibox/network.py                        |  18 +-
 tests/components/test_functional.py      |  49 +++-
 tests/shared_networks.py                 |  30 +++
 7 files changed, 360 insertions(+), 355 deletions(-)

diff --git a/hzy/hzy_test.py b/hzy/hzy_test.py
index 7063cfc4..07d5fa97 100644
--- a/hzy/hzy_test.py
+++ b/hzy/hzy_test.py
@@ -8,148 +8,6 @@
 from paibox.simulator.utils import _conv2d_faster_fp32
 
 
-class fcnet_2layer_dual_port(pb.Network):
-    def __init__(self, weight1, Vthr1, weight2, Vthr2):
-        super().__init__()
-
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(5,))
-        self.i2 = pb.InputProj(input=pe,shape_out=(5,))
-        self.n1 = pb.IF(10, threshold=Vthr1, reset_v=0, name="delay_1")
-        self.s1 = pb.FullConn(
-            self.i1,
-            self.n1,
-            weights=weight1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.n2 = pb.IF(
-            5, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="delay_2"
-        )
-        self.s2 = pb.FullConn(
-            self.i2,
-            self.n2,
-            weights=weight1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.n3 = pb.IF(
-            20, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="IF_1"
-        )
-        self.s3 = pb.FullConn(
-            self.n1,
-            self.n3,
-            weights=weight1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s4 = pb.FullConn(
-            self.n2,
-            self.n3,
-            weights=weight1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        # self.n3 = pb.IF(
-        #     20, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="IF_2"
-        # )
-        # self.s4 = pb.FullConn(
-        #     self.n2,
-        #     self.n3,
-        #     weights=weight1,
-        #     conn_type=pb.SynConnType.All2All,
-        # )
-
-        # tick_wait_start = 2 for second layer
-
-        # self.n3 = pb.IF(
-        #     5, threshold=Vthr2, reset_v=0, tick_wait_start=2, name="batch_dual_port_o2"
-        # )
-        # self.s3 = pb.FullConn(
-        #     self.n1,
-        #     self.n2,
-        #     weights=weight2,
-        #     conn_type=pb.SynConnType.All2All,
-        # )
-        # self.s4 = pb.FullConn(
-        #     self.n1,
-        #     self.n3,
-        #     weights=weight2,
-        #     conn_type=pb.SynConnType.All2All,
-        # )
-        #
-        # self.probe1 = pb.Probe(target=self.n2, attr="spike")
-        # self.probe2 = pb.Probe(target=self.n3, attr="spike")
-
-class fcnet_3(pb.Network):
-    def __init__(self):
-        super().__init__()
-
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(2, 5, 5))
-        self.n1 = pb.IF((1, 7), threshold=1, reset_v=0, name="n_1")
-        self.n2 = pb.IF((1, 5, 5), threshold=1, reset_v=0, name="n_2")
-        self.n3 = pb.IF((1, 5, 5), threshold=1, reset_v=0, name="n_3")
-        self.n4 = pb.IF((1, 5), threshold=1, reset_v=0, name="n_4")
-        self.n5 = pb.IF((1, 3), threshold=1, reset_v=0, name="n_5")
-        self.n6 = pb.IF((1, 3), threshold=1, reset_v=0, name="n_6")
-        self.n7 = pb.IF((1, 3), threshold=1, reset_v=0, name="n_7")
-        self.s0 = pb.FullConn(
-            self.i1,
-            self.n1,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s1 = pb.FullConn(
-            self.n1,
-            self.n2,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s2 = pb.FullConn(
-            self.n2,
-            self.n3,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s3 = pb.FullConn(
-            self.n1,
-            self.n3,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s4 = pb.FullConn(
-            self.n3,
-            self.n4,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s5 = pb.FullConn(
-            self.n4,
-            self.n5,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s6 = pb.FullConn(
-            self.n3,
-            self.n5,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s7 = pb.FullConn(
-            self.n5,
-            self.n6,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s8 = pb.FullConn(
-            self.n6,
-            self.n7,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.s9 = pb.FullConn(
-            self.n5,
-            self.n7,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
 weight1 = np.random.randint(0, 10, size=(32, 1, 5, 5), dtype=np.int8)
 weight2 = np.random.randint(0, 10, size=(32, 32, 2, 2), dtype=np.int8)
 weight3 = np.random.randint(0, 10, size=(64, 32, 5, 5), dtype=np.int8)
@@ -243,50 +101,81 @@ def __init__(self):
 def out_bypass1(t, data1, *args, **kwargs):
     return data1
 
-input_data1 = np.array([[1,2,5,7,5],
-                       [2,0,8,8,2],
-                       [3,8,5,7,5],
-                       [4,9,2,5,4],
-                       [5,10,2,3,8],
-                       [0,0,0,0,0],
-                       [0,0,0,0,0],
-                       [0,0,0,0,0],
-                       [0,0,0,0,0]], dtype=np.int8)
+# input_data1 = np.array([[1,2,5,7,5],
+#                        [2,0,8,8,2],
+#                        [3,8,5,7,5],
+#                        [4,9,2,5,4],
+#                        [5,10,2,3,8],
+#                        [0,0,0,0,0],
+#                        [0,0,0,0,0],
+#                        [0,0,0,0,0],
+#                        [0,0,0,0,0],
+#                        [0,0,0,0,0]], dtype=np.int8)
+#
+# weight1 = np.array([[1,0],
+#  [0 ,1],
+#  [1 ,0],
+#  [0 ,1],
+#  [1 ,0],
+#  [0 ,1],
+#  [0 ,1],
+#  [0 ,0],
+#  [1 ,1]], dtype=np.int8)
+inpa = np.random.randint(0, 2, size=(1, 11, 11)).astype(np.int8)
+inpb = np.concatenate([inpa, np.zeros((1, 10, 11))], axis=1)
+weight = np.random.randint(0, 2, size=(3*3, 2), dtype=np.int8)
 class fcnet_5(pb.DynSysGroup):
     def __init__(self):
         super().__init__()
-        self.i1 = pb.InputProj(input=out_bypass1, shape_out=(1, 5))
-        self.conv1 = pb.ConvHalfRoll(self.i1, np.array([[[[2,1,2],[1,2,1],[1,2,3]]]], dtype=np.int8), 1, 1, tick_wait_start=1)
-        self.conv2 = pb.ConvHalfRoll(self.conv1, np.array([[[[-2,1,2],[1,-2,1],[1,2,-3]]]], dtype=np.int8),1,0, tick_wait_start=3)
-        # self.linear1 = pb.DelayFullConn(
-        #     self.n5,
-        #     self.n6,
-        #     delay=4,
-        #     weights=np.random.randint(0, 10, size=(1024, 256), dtype=np.int8),
-        #     conn_type=pb.SynConnType.All2All,
-        # )
-        # self.filter = pb.Filter(self.n8, 28)
-pb_net = fcnet_5()
-
-generated = pb.DynSysGroup.build_fmodule(pb_net)
+        self.i1 = pb.InputProj(input=out_bypass1, shape_out=(1, 11))
+        self.conv1 = pb.ConvHalfRoll(self.i1, np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8), 2, 0, tick_wait_start=1)
+        self.conv2 = pb.ConvHalfRoll(self.conv1, np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8), 1, 0, tick_wait_start=3)
+        self.linear1 = pb.DelayFullConn(
+            self.conv2,
+            2,
+            weights=weight,
+            conn_type=pb.SynConnType.All2All,
+            tick_wait_start=5
+        )
 
-sim1 = pb.Simulator(pb_net, start_time_zero=False)
+pb_net1 = fcnet_5()
+conv = pb_net1.conv2
+linear = pb_net1.linear1
+generated = pb.DynSysGroup.build_fmodule(pb_net1)
 
-for i in range(9):
-    pb.FRONTEND_ENV.save(data1=input_data1[i])
-    sim1.run(1)
-    #print(pb_net.nd_Conv_HalfRoll_0.output)
-    print(pb_net.nd_Conv_HalfRoll_1.output)
+sim1 = pb.Simulator(pb_net1, start_time_zero=False)
 
-output =_conv2d_faster_fp32(np.array([[[1,2,3,4,5],[2,0,8,9,10],[5,8,5,2,2],[7,8,7,5,3],[5,2,5,4,8]]]),
-                            np.array([[[[2,1,2],[1,2,1],[1,2,3]]]], dtype=np.int8),
-                            (1,1),
-                            (1,1))
-#print(output)
-output = _conv2d_faster_fp32(np.array([[[8,34,55,72,43],[43,56,64,71,48],[58,90,83,82,45],[59,82,76,73,37],[35,50,49,46,33]]]),
-                             np.array([[[[-2,1,2],[1,-2,1],[1,2,-3]]]], dtype=np.int8),(1,1),(0,0))
-print(output)
 
+probe_conv = pb.Probe(generated[conv][0], "output")
+probe_linear = pb.Probe(generated[linear][0], "output")
+sim1.add_probe(probe_conv)
+sim1.add_probe(probe_linear)
+for i in range(20):
+    pb.FRONTEND_ENV.save(data1=inpb[0][i])
+    sim1.run(1)
+    #print(pb_net1.nd_Delay_FullConn_0.output)
+    #sim2.run(1)
+for i in range(17):
+#     print(sim1.data[probe_conv][i])
+    print(sim1.data[probe_linear][i])
+data = np.array(sim1.data[probe_conv][8:15])
+print(data)
+#data = np.transpose(data, (1, 0))
+print(data)
+# output = data.ravel() @ weight
+# print(output)
+# output =_conv2d_faster_fp32(np.array([[[1,2,3,4,5],[2,0,8,9,10],[5,8,5,2,2],[7,8,7,5,3],[5,2,5,4,8]]]),
+#                             np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8),
+#                             (2,2),
+#                             (1,1))
+# output[output < 0] = 0
+# print(output)
+# #output = np.transpose(output, (0, 2, 1))
+#
+# #print(output.ravel() @ weight1)
+# output = _conv2d_faster_fp32(output, np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8),(2,2),(0,0))
+# output[output < 0] = 0
+# print(output)
 
 
 class deeplabv2(pb.DynSysGroup):
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index e230aae0..65cfe7c9 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -41,8 +41,9 @@
 from .projection import InputProj
 from .synapses import ConnType, FullConnSyn, Conv2dHalfRollSyn
 from .synapses.conv_types import _Size2Type
-from .synapses.conv_utils import _fm_ndim2_check, _pair
+from .synapses.conv_utils import _fm_ndim2_check, _pair, _conv2d_faster
 from .synapses.transforms import Conv2dForward, _Pool2dForward
+from ..simulator.utils import _conv2d_faster_fp32
 
 if sys.version_info >= (3, 13):
     from warnings import deprecated
@@ -989,21 +990,22 @@ class Delay_FullConn(FunctionalModule):
     def __init__(
             self,
             neuron_s: Union[NeuDyn, InputProj],
-            neuron_d: Union[NeuDyn, InputProj],
-            delay: int,
+            #neuron_d: Union[NeuDyn, InputProj],
+            out_feature: tuple[int, ...],
+            #delay: int,
             weights: DataArrayType = 1,
             conn_type: ConnType = ConnType.MatConn,
             keep_shape: bool = False,
             name: Optional[str] = None,
             **kwargs,
     ) -> None:
-        self.delay = delay
+        #self.delay =
         self.weights = weights
         self.conn_type = conn_type
-        _shape_out = neuron_d.shape_out
+        _shape_out = out_feature
         super().__init__(
             neuron_s,
-            neuron_d,
+            #neuron_d,
             shape_out=_shape_out,
             keep_shape=keep_shape,
             name=name,
@@ -1011,21 +1013,41 @@ def __init__(
         )
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        return
+        output = x1 @ self.weights
+        return output
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        if len(self.module_intf.operands[0].shape_out)!=2:
+    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+        if len(self.module_intf.operands[0].shape_out) != 2:
             raise ShapeError("The source node must be a successor to the half-convolution")
         delay_shape = self.module_intf.operands[0].shape_out
         delay_neurons = []
-        for i in range(self.delay):
+        neuron_d = Neuron(
+            self.shape_out,
+            reset_mode=RM.MODE_NONRESET,
+            neg_thres_mode=NTM.MODE_SATURATION,
+            leak_v=0,
+            neg_threshold=0,
+            pos_threshold=0,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start+1,
+            tick_wait_end=self.tick_wait_end,
+            input_width=self.input_width,
+            spike_width=self.spike_width,
+            snn_en=self.snn_en,
+            keep_shape=self.keep_shape,
+            name=f"nd_{self.name}",
+        )
+        for i in range(delay_shape[1]):
             neuron = Neuron(
                 shape=delay_shape,
                 leak_v=0,
                 neg_threshold=0,
-                delay=i+1,
+                delay=delay*i+1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
+                input_width=self.input_width,
+                spike_width=self.spike_width,
+                snn_en=self.snn_en,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1039,17 +1061,18 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
                 name=f"s{i}_delay",
             )
             #w = np.zeros((neuron.num_out, self.module_intf.operands[1].num_out))
-            w = self.weights[i::self.delay, :]
+            w = self.weights[delay_shape[1]-i-1::delay_shape[1], :]
             syn2 = FullConnSyn(  # cin,(kw-1)*ih -> cout * oh
                 delay_neurons[i], # 54 -> 54
-                self.module_intf.operands[1],
+                neuron_d,
                 weights=w,
                 conn_type=self.conn_type,
                 name=f"s{i}_{self.name}",
             )
-            network._add_components(neuron, syn1, syn2)
-            network._remove_components(self)
-            generated = [*delay_neurons, syn1, syn2]
+
+            generated = [neuron_d, *delay_neurons, syn1, syn2]
+            self._rebuild_out_intf(network, neuron_d, *generated, **build_options)
+
         return generated
 
 
@@ -1080,20 +1103,16 @@ def __init__(
             )
 
         if len(neuron_s.shape_out) != 2:
-            in_ch, in_h, in_w = _fm_ndim2_check(neuron_s.shape_out, "CHW")
-            neuron_s.shape_change((in_ch, in_h))
-        in_ch, in_h = neuron_s.shape_out
+            in_ch, in_h, in_w = neuron_s.shape_out
+        #     in_ch, in_h, in_w = _fm_ndim2_check(neuron_s.shape_out, "CHW")
+        #     neuron_s.shape_change((in_ch, in_h))
+        else:
+            in_ch, in_h, = neuron_s.shape_out
         cout, cin, kh, kw = kernel.shape
-        # if len(neuron_d.shape_out) != 2:
-        #     out_ch, out_h, out_w = _fm_ndim2_check(neuron_d.shape_out, "CHW")
-        #     neuron_d.shape_change((cout, out_h))
-
-
         out_h = (in_h - kh + 2 * self.padding[0] ) // self.stride[0] + 1
         if in_ch != cin:
             raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
 
-
         _shape_out = (cout, out_h)
 
         super().__init__(
@@ -1106,11 +1125,18 @@ def __init__(
         )
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        #print("进入function.spike_func")
-        return
+        print("进入conv.spike_func")
+        print(x1)
+        #output = _conv2d_faster_fp32(x1, self.kernel, self.stride, self.padding)
+        output = _conv2d_faster(x1, self.shape_out, self.kernel, self.stride, self.padding)
+        output[output < 0] = 0
+        return output
 
     def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
         #print("进入build")
+        if len(self.module_intf.operands[0].shape_out) != 2:
+            in_ch, in_h, in_w = _fm_ndim2_check(self.module_intf.operands[0].shape_out, "CHW")
+            self.module_intf.operands[0].shape_change((in_ch, in_h))
         in_ch, in_h = self.module_intf.operands[0].shape_out
         cout, cin, kh, kw = self.kernel.shape
         n_delays = NodeList()
@@ -1212,6 +1238,9 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start,
             tick_wait_end=self.tick_wait_end,
+            input_width=self.input_width,
+            spike_width=self.spike_width,
+            snn_en=self.snn_en,
             keep_shape=self.keep_shape,
             name="filter"
         )
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index dd55f0da..fc944226 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -79,8 +79,14 @@ def update(self, x: Optional[NeuOutType] = None, *args, **kwargs) -> SynOutType:
         else:
             # Retrieve 0 to the dest neurons if it is not working
             synin = np.zeros_like(self.source.output)
-
+        # for i in range(5):
+        #     if self.name == f"s{i}_Conv_HalfRoll_1":
+        #         print(f"{self.name}", synin)
+        #         print(self.connectivity)
         self._synout = self.comm(synin).ravel()
+        # for i in range(5):
+        #     if self.name == f"s{i}_Conv_HalfRoll_1":
+        #         print(f"{self.name}", self._synout)
         return self._synout
 
     def reset_state(self, *args, **kwargs) -> None:
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 64194f16..61594193 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -61,11 +61,11 @@ def _fm_ndim2_check(fm_shape: SizeAnyType, fm_order: _Order3d) -> Size3Type:
 
 
 def _conv1d_unroll(
-    in_shape: Size1Type,
-    out_shape: Size1Type,
-    kernel: WeightType,
-    stride: Size1Type,
-    padding: Size1Type,
+        in_shape: Size1Type,
+        out_shape: Size1Type,
+        kernel: WeightType,
+        stride: Size1Type,
+        padding: Size1Type,
 ) -> WeightType:
     """Unroll the kernel of 1d convolution into a matrix."""
     cout, cin, kl = kernel.shape
@@ -80,9 +80,9 @@ def _conv1d_unroll(
         for ch_idx in np.ndindex(kernel.shape[:2]):
             # [0] -> o_ch, [1] -> i_ch
             zeros_image[
-                i * stride[0] + ch_idx[1] * il : i * stride[0] + ch_idx[1] * il + kl,
-                ch_idx[0],
-                i,
+            i * stride[0] + ch_idx[1] * il: i * stride[0] + ch_idx[1] * il + kl,
+            ch_idx[0],
+            i,
             ] = kernel[ch_idx[0], ch_idx[1], :]
 
         # if fm_order == "CL":
@@ -100,19 +100,19 @@ def _conv1d_unroll(
     nil = in_shape[0]
     w_unrolled = np.zeros((cin * nil, cout * ol), dtype=kernel.dtype)
     for i in range(cin):
-        w_unrolled[i * nil : i * nil + nil, :] = w_unrolled_np[
-            i * il + padding[0] : i * il + il - padding[0], :
-        ]
+        w_unrolled[i * nil: i * nil + nil, :] = w_unrolled_np[
+                                                i * il + padding[0]: i * il + il - padding[0], :
+                                                ]
 
     return w_unrolled
 
 
 def _conv2d_unroll(
-    in_shape: Size2Type,
-    out_shape: Size2Type,
-    kernel: WeightType,
-    stride: Size2Type,
-    padding: Size2Type,
+        in_shape: Size2Type,
+        out_shape: Size2Type,
+        kernel: WeightType,
+        stride: Size2Type,
+        padding: Size2Type,
 ) -> WeightType:
     """Unroll the kernel of 2d convolution into a matrix."""
     cout, cin, kh, kw = kernel.shape
@@ -133,21 +133,21 @@ def _conv2d_unroll(
             for ch_idx in np.ndindex(kernel.shape[:2]):
                 # [0] -> o_ch, [1] -> i_ch
                 zeros_image[
-                    i * stride[0]
-                    + ch_idx[1] * ih : i * stride[0]
-                    + ch_idx[1] * ih
-                    + kh,
-                    j * stride[1]
-                    + ch_idx[0] * iw : j * stride[1]
-                    + ch_idx[0] * iw
-                    + kw,
-                    i * ow + j,
+                i * stride[0]
+                + ch_idx[1] * ih: i * stride[0]
+                                  + ch_idx[1] * ih
+                                  + kh,
+                j * stride[1]
+                + ch_idx[0] * iw: j * stride[1]
+                                  + ch_idx[0] * iw
+                                  + kw,
+                i * ow + j,
                 ] = kernel[ch_idx[0], ch_idx[1], :, :]
 
             t = (
                 zeros_image[:, :, i * ow + j]
-                .reshape(cin * ih, cout, iw)
-                .transpose(1, 0, 2)
+                    .reshape(cin * ih, cout, iw)
+                    .transpose(1, 0, 2)
             )
             # else:
             #     # (cin*ih, cout, iw) -> (cout, cin, ih, iw)
@@ -168,15 +168,15 @@ def _conv2d_unroll(
 
     for i in range(cin):
         for j in range(nih):
-            w_unrolled[i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
+            w_unrolled[i * nin_size + j * niw: i * nin_size + j * niw + niw, :] = (
                 w_unrolled_np[
-                    i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1] : i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1]
-                    + niw,
-                    :,
+                i * in_size
+                + (padding[0] + j) * iw
+                + padding[1]: i * in_size
+                              + (padding[0] + j) * iw
+                              + padding[1]
+                              + niw,
+                :,
                 ]
             )
 
@@ -190,29 +190,33 @@ def _conv2d_halfroll(
         stride: Size2Type,
         padding: Size2Type,
 ) -> WeightType:
-    cout, cin, kh= kernel.shape
-    #ih = in_shape[1] + 2 * padding[0]
-    ih = in_shape[1]
+    cout, cin, kh = kernel.shape
+    ih = in_shape[1] + 2 * padding[0]
+    #ih = in_shape[1]
     o_ch, oh = out_shape
     w_np = np.zeros((cin * ih, cout * oh), dtype=kernel.dtype)
     for i in range(cout):
         for j in range(cin):
-            for k in range(oh-2*padding[0]):
-                w_np[j*ih+k*stride[1]:j*ih+k*stride[1]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
-            for k in range(padding[0]):
-                w_np[j*ih+k*stride[1]:j*ih+k*stride[1]+kh-padding[0], i*oh+k] = kernel[i, j, k+1:]
-                w_np[j*ih+ih-padding[0]-1:j*ih+ih, (i+1)*oh-1] = kernel[i, j, :-(k+1)]
+            if padding[0] == 0:
+                for k in range(oh):
+                    # w_np[j*ih+padding[0]*(stride[1]-1)+k*stride[1]:j*ih+padding[1]*(stride[1]-1)+k*stride[1]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
+                    # w_np[j*ih+stride[1]*(padding[0]+k)-padding[0]:j*ih+stride[1]*(padding[0]+k)-padding[0]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
+                    w_np[j * ih + k * stride[1]:j * ih + k * stride[1] + kh, i * oh + k] = kernel[i, j, :]
+            else:
+                for k in range(oh):
+                    w_np[j * ih + k * stride[1]:j * ih + k * stride[1] + kh, i * oh + k] = kernel[i, j, :]
+            w_np= np.delete(w_np, np.concatenate((np.arange(padding[0]), np.arange(ih-padding[0], ih))), axis=0)
     return w_np
 
 
 def _pool2d_kernel_unroll(
-    channels: int,
-    in_shape: Size2Type,
-    out_shape: Size2Type,
-    ksize: Size2Type,
-    stride: Size2Type,
-    padding: Size2Type,
-    # fm_order: str,
+        channels: int,
+        in_shape: Size2Type,
+        out_shape: Size2Type,
+        ksize: Size2Type,
+        stride: Size2Type,
+        padding: Size2Type,
+        # fm_order: str,
 ) -> WeightType:
     kh, kw = ksize
     ih = in_shape[0] + 2 * padding[0]
@@ -228,8 +232,8 @@ def _pool2d_kernel_unroll(
             zeros_image = np.zeros((channels * ih, iw * channels), dtype=np.bool_)
             for i_ch in range(channels):
                 zeros_image[
-                    (i * stride[0] + i_ch * ih) : (i * stride[0] + i_ch * ih) + kh,
-                    (j * stride[1] + i_ch * iw) : (j * stride[1] + i_ch * iw) + kw,
+                (i * stride[0] + i_ch * ih): (i * stride[0] + i_ch * ih) + kh,
+                (j * stride[1] + i_ch * iw): (j * stride[1] + i_ch * iw) + kw,
                 ] = 1
 
             temp = zeros_image.reshape((channels * ih, channels, iw)).transpose(1, 0, 2)
@@ -243,15 +247,15 @@ def _pool2d_kernel_unroll(
 
     for i in range(channels):
         for j in range(nih):
-            w_unrolled[i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
+            w_unrolled[i * nin_size + j * niw: i * nin_size + j * niw + niw, :] = (
                 w_unrolled_np[
-                    i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1] : i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1]
-                    + niw,
-                    :,
+                i * in_size
+                + (padding[0] + j) * iw
+                + padding[1]: i * in_size
+                              + (padding[0] + j) * iw
+                              + padding[1]
+                              + niw,
+                :,
                 ]
             )
 
@@ -259,13 +263,13 @@ def _pool2d_kernel_unroll(
 
 
 def _func_pool2d(
-    x_chw: NeuOutType,
-    out_shape: Size2Type,
-    ksize: Size2Type,
-    stride: Size2Type,
-    padding: Size2Type,
-    type: str,
-    threshold: int,
+        x_chw: NeuOutType,
+        out_shape: Size2Type,
+        ksize: Size2Type,
+        stride: Size2Type,
+        padding: Size2Type,
+        type: str,
+        threshold: int,
 ) -> NeuOutType:
     xcin, xh, xw = x_chw.shape
     kh, kw = ksize
@@ -288,17 +292,17 @@ def _func_pool2d(
                 if type == "avg":
                     out[c, i, j] = np.sum(
                         x_padded[
-                            c,
-                            stride[0] * i : stride[0] * i + kh,
-                            stride[1] * j : stride[1] * j + kw,
+                        c,
+                        stride[0] * i: stride[0] * i + kh,
+                        stride[1] * j: stride[1] * j + kw,
                         ]
                     )
                 else:
                     out[c, i, j] = np.max(
                         x_padded[
-                            c,
-                            stride[0] * i : stride[0] * i + kh,
-                            stride[1] * j : stride[1] * j + kw,
+                        c,
+                        stride[0] * i: stride[0] * i + kh,
+                        stride[1] * j: stride[1] * j + kw,
                         ]
                     )
 
@@ -311,11 +315,11 @@ def _func_pool2d(
 
 
 def _conv1d_faster(
-    x_cl: NeuOutType,
-    out_shape: Size1Type,
-    kernel: WeightType,
-    stride: Size1Type,
-    padding: Size1Type,
+        x_cl: NeuOutType,
+        out_shape: Size1Type,
+        kernel: WeightType,
+        stride: Size1Type,
+        padding: Size1Type,
 ) -> SynOutType:
     """Faster 1d convolution.
 
@@ -342,12 +346,12 @@ def _conv1d_faster(
 
 
 def _conv2d_faster(
-    x_chw: NeuOutType,
-    out_shape: Size2Type,
-    kernel: WeightType,
-    stride: Size2Type,
-    padding: Size2Type,
-    # fm_order: str,
+        x_chw: NeuOutType,
+        out_shape: Size2Type,
+        kernel: WeightType,
+        stride: Size2Type,
+        padding: Size2Type,
+        # fm_order: str,
 ) -> SynOutType:
     """Faster 2d convolution.
 
@@ -378,12 +382,12 @@ def _conv2d_faster(
 
 
 def _convtranspose1d_unroll(
-    in_shape: Size1Type,
-    out_shape: Size1Type,
-    kernel: WeightType,
-    stride: Size1Type,
-    padding: Size1Type,
-    output_padding: Size1Type,
+        in_shape: Size1Type,
+        out_shape: Size1Type,
+        kernel: WeightType,
+        stride: Size1Type,
+        padding: Size1Type,
+        output_padding: Size1Type,
 ) -> WeightType:
     """Unroll the kernel of 1d transposed convolution into a matrix.
 
@@ -404,12 +408,12 @@ def _convtranspose1d_unroll(
         for ch_idx in np.ndindex(kernel_flip.shape[:2]):
             # [0] -> o_ch, [1] -> i_ch
             zeros_image[
-                i * stride_transpose
-                + ch_idx[1] * il : i * stride_transpose
-                + ch_idx[1] * il
-                + kl,
-                ch_idx[0],
-                i,
+            i * stride_transpose
+            + ch_idx[1] * il: i * stride_transpose
+                              + ch_idx[1] * il
+                              + kl,
+            ch_idx[0],
+            i,
             ] = kernel_flip[ch_idx[0], ch_idx[1], :]
 
         t = zeros_image[:, :, i].T
@@ -421,9 +425,9 @@ def _convtranspose1d_unroll(
     nil = in_shape[0] + (in_shape[0] - 1) * (stride[0] - 1)
     w_unrolled_nk = np.zeros((cin * nil, cout * ol), dtype=kernel.dtype)
     for i in range(cin):
-        w_unrolled_nk[i * nil : i * nil + nil, :] = w_unrolled_np[
-            i * il + kl - 1 : i * il + kl - 1 + nil, :
-        ]
+        w_unrolled_nk[i * nil: i * nil + nil, :] = w_unrolled_np[
+                                                   i * il + kl - 1: i * il + kl - 1 + nil, :
+                                                   ]
 
     # stripe
     w_reshaped = w_unrolled_nk.reshape((cin, nil, cout, ol))
@@ -434,7 +438,7 @@ def _convtranspose1d_unroll(
     # padding
     # w_unrolled : (cin, in_shape[0], cout, ol - output_padding[0])
     w_unrolled = (
-        w_unrolled_ns[:, :, :, padding[0] : (-1 * padding[0])]
+        w_unrolled_ns[:, :, :, padding[0]: (-1 * padding[0])]
         if padding[0] > 0
         else w_unrolled_ns
     )
@@ -449,12 +453,12 @@ def _convtranspose1d_unroll(
 
 
 def _convtranspose2d_unroll(
-    in_shape: Size2Type,
-    out_shape: Size2Type,
-    kernel: WeightType,
-    stride: Size2Type,
-    padding: Size2Type,
-    output_padding: Size2Type,
+        in_shape: Size2Type,
+        out_shape: Size2Type,
+        kernel: WeightType,
+        stride: Size2Type,
+        padding: Size2Type,
+        output_padding: Size2Type,
 ) -> WeightType:
     """Unroll the kernel of 2d transposed convolution into a matrix."""
     kernel_flip = np.flip(kernel, axis=(2, 3))
@@ -478,21 +482,21 @@ def _convtranspose2d_unroll(
             for ch_idx in np.ndindex(kernel_flip.shape[:2]):
                 # [0] -> o_ch, [1] -> i_ch
                 zeros_image[
-                    i * stride_transpose[0]
-                    + ch_idx[1] * ih : i * stride_transpose[0]
-                    + ch_idx[1] * ih
-                    + kh,
-                    j * stride_transpose[1]
-                    + ch_idx[0] * iw : j * stride_transpose[1]
-                    + ch_idx[0] * iw
-                    + kw,
-                    i * ow + j,
+                i * stride_transpose[0]
+                + ch_idx[1] * ih: i * stride_transpose[0]
+                                  + ch_idx[1] * ih
+                                  + kh,
+                j * stride_transpose[1]
+                + ch_idx[0] * iw: j * stride_transpose[1]
+                                  + ch_idx[0] * iw
+                                  + kw,
+                i * ow + j,
                 ] = kernel_flip[ch_idx[0], ch_idx[1], :, :]
 
             t = (
                 zeros_image[:, :, i * ow + j]
-                .reshape(cin * ih, cout, iw)
-                .transpose(1, 0, 2)
+                    .reshape(cin * ih, cout, iw)
+                    .transpose(1, 0, 2)
             )
             for o_ch in range(cout):
                 w_unrolled_np[:, i * ow + j + o_ch * out_size] = t[o_ch].ravel()
@@ -545,12 +549,12 @@ def _convtranspose2d_unroll(
 
 
 def _convtranspose1d_faster(
-    x_cl: NeuOutType,
-    out_shape: Size1Type,
-    kernel: WeightType,
-    stride: Size1Type,
-    padding: Size1Type,
-    output_padding: Size1Type,
+        x_cl: NeuOutType,
+        out_shape: Size1Type,
+        kernel: WeightType,
+        stride: Size1Type,
+        padding: Size1Type,
+        output_padding: Size1Type,
 ) -> SynOutType:
     # (C, L)
     xc, xl = x_cl.shape
@@ -589,7 +593,7 @@ def _convtranspose1d_faster(
     out = out.T
 
     # inverse padding : (cout, (xl-1)*stride+kernel) -> (cout, (xl-1)*stride+kernel-2*padding)
-    out = out[:, padding[0] : (-1 * padding[0])] if padding[0] > 0 else out
+    out = out[:, padding[0]: (-1 * padding[0])] if padding[0] > 0 else out
 
     # output_padding
     out = np.pad(out, ((0, 0), (0, output_padding[0])), mode="constant")
@@ -598,12 +602,12 @@ def _convtranspose1d_faster(
 
 
 def _convtranspose2d_faster(
-    x_chw: NeuOutType,
-    out_shape: Size2Type,
-    kernel: WeightType,
-    stride: Size2Type,
-    padding: Size2Type,
-    output_padding: Size2Type,
+        x_chw: NeuOutType,
+        out_shape: Size2Type,
+        kernel: WeightType,
+        stride: Size2Type,
+        padding: Size2Type,
+        output_padding: Size2Type,
 ) -> SynOutType:
     # (C, H, W)
     xc, xh, xw = x_chw.shape
@@ -649,10 +653,10 @@ def _convtranspose2d_faster(
     # padding & output_padding
     # inverse padding
     out = out[
-        :,
-        padding[0] : (-1 * padding[0]) if padding[0] > 0 else None,
-        padding[1] : (-1 * padding[1]) if padding[1] > 0 else None,
-    ]
+          :,
+          padding[0]: (-1 * padding[0]) if padding[0] > 0 else None,
+          padding[1]: (-1 * padding[1]) if padding[1] > 0 else None,
+          ]
     # output_padding
     out = np.pad(
         out, ((0, 0), (0, output_padding[0]), (0, output_padding[1])), mode="constant"
@@ -662,7 +666,7 @@ def _convtranspose2d_faster(
 
 
 def _1d_im2col(
-    x_padded: NeuOutType, ol: int, kl: int, stride: Size1Type
+        x_padded: NeuOutType, ol: int, kl: int, stride: Size1Type
 ) -> NDArray[np.int64]:
     cols = np.zeros((ol, x_padded.shape[0] * kl), dtype=np.int64)
 
@@ -670,14 +674,14 @@ def _1d_im2col(
 
     idx = 0
     for i in range(0, pl - kl + 1, stride[0]):
-        cols[idx] = x_padded[:, i : i + kl].ravel()
+        cols[idx] = x_padded[:, i: i + kl].ravel()
         idx += 1
 
     return cols
 
 
 def _2d_im2col(
-    x_padded: NeuOutType, oh: int, ow: int, kh: int, kw: int, stride: Size2Type
+        x_padded: NeuOutType, oh: int, ow: int, kh: int, kw: int, stride: Size2Type
 ) -> NDArray[np.int64]:
     cols = np.zeros((oh * ow, x_padded.shape[0] * kh * kw), dtype=np.int64)
 
@@ -686,7 +690,7 @@ def _2d_im2col(
     idx = 0
     for i in range(0, ph - kh + 1, stride[0]):
         for j in range(0, pw - kw + 1, stride[1]):
-            cols[idx] = x_padded[:, i : i + kh, j : j + kw].ravel()
+            cols[idx] = x_padded[:, i: i + kh, j: j + kw].ravel()
             idx += 1
 
     return cols
diff --git a/paibox/network.py b/paibox/network.py
index d399e7c1..ce9f9d81 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -74,19 +74,19 @@ def build_fmodule(
         cls, network: "DynSysGroup", **build_options
     ) -> dict[NeuModule, BuiltComponentType]:
         try:
-            from .components.functional import Conv_HalfRoll
+            from .components.functional import Conv_HalfRoll, Delay_FullConn
         except ImportError:
-            Conv_HalfRoll = None
+            Conv_HalfRoll, Delay_FullConn = None
         generated = dict()
-        modules = network.components.subset(NeuModule).unique()
-        delay = 0
+        modules = network.nodes().subset(NeuModule).unique()
+        delay = 1
         for module in modules.values():
             if Conv_HalfRoll is not None and isinstance(module, Conv_HalfRoll):
-                #print(module.stride)
-                generated[module] = module.build(network, module.stride[1] ** (delay), **build_options)
-                if module.stride[1] != 1 :
-                    delay += 1
-
+                generated[module] = module.build(network, delay, **build_options)
+                if module.stride[1] != 1:
+                    delay = delay*module.stride[1]
+            elif Delay_FullConn is not None and isinstance(module, Delay_FullConn):
+                generated[module] = module.build(network, delay, **build_options)
             else:
                 generated[module] = module.build(network, **build_options)
 
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index b50ee82a..629ac5f7 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -6,6 +6,7 @@
 from paibox.components import NeuModule
 from paibox.components.synapses.conv_utils import _pair
 from paibox.network import DynSysGroup
+from paibox.simulator.utils import _conv2d_faster_fp32
 from paibox.utils import as_shape, shape2num, typical_round
 
 
@@ -442,7 +443,7 @@ def test_SpikingPool2d(
         sim2.add_probe(probe_p2d)
 
         # Use binomial distribution to generate a sparse matrix with more zeros
-        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
+        inpa = np.random.binomial(1, p_binomial, size=(20,)).astype(np.bool_)
 
         for i in range(20):
             pb.FRONTEND_ENV.save(data1=inpa[i])
@@ -645,3 +646,49 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.build(net1)
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
+
+    @pytest.mark.parametrize(
+        "shape, kernel, stride, padding, out_feature, weight",
+        [
+            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+             [1, 1], [0, 0], 10, np.random.randint(-5, 5, size=(7*7, 10), dtype=np.int8)),
+            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+             [1, 2], [0, 0], 10, np.random.randint(-5, 5, size=(4*4, 10), dtype=np.int8)),
+            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+             [2, 1], [0, 0], 10, np.random.randint(-5, 5, size=(3 * 3, 10), dtype=np.int8)),
+            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+             [2, 2], [0, 0], 10, np.random.randint(-5, 5, size=(2*2, 10), dtype=np.int8)),
+        ],
+    )
+    def test_Conv_HalfRoll_Net(self, shape, kernel, stride, padding, out_feature, weight):
+        from tests.shared_networks import Conv_HalfRoll_Net1, Conv_HalfRoll_Net2
+
+        #net1 = Conv_HalfRoll_Net1(shape, kernel, stride, padding, out_feature, delay, weight)
+        net2 = Conv_HalfRoll_Net2(shape, kernel, stride, padding, out_feature, weight)
+        conv = net2.conv2
+        linear = net2.linear1
+        generated = DynSysGroup.build_fmodule(net2)
+        #sim1 = pb.Simulator(net1, start_time_zero=False)
+        sim2 = pb.Simulator(net2, start_time_zero=False)
+
+        probe_conv = pb.Probe(generated[conv][0], "output")
+        probe_linear = pb.Probe(generated[linear][0], "output")
+        sim2.add_probe(probe_conv)
+        sim2.add_probe(probe_linear)
+        # Use binomial distribution to generate a sparse matrix with more zeros
+        inpa = np.random.randint(0, 5, size=(1, 11, 11)).astype(np.int8)
+        inpb = np.concatenate([inpa, np.zeros((1, 10, 11))], axis=1)
+        for i in range(17):
+            pb.FRONTEND_ENV.save(data1=inpb[0][i])
+            sim2.run(1)
+        expected = _conv2d_faster_fp32(np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0]))
+        expected[expected < 0] = 0
+        expected = _conv2d_faster_fp32(expected, kernel, _pair(stride[1]), _pair(padding[1]))
+        expected[expected < 0] = 0
+        # print(sim2.data[probe_conv][7:14])
+        # print(expected)
+        expected = expected.ravel() @ weight
+        expected[expected < 0] = 0
+        #expected = np.clip(expected, 0, 7)
+        print(expected)
+        print(sim2.data[probe_linear][15])
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 7a0b52b7..6f3a2553 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -216,6 +216,36 @@ def __init__(self, shape, axes):
         self.probe1 = pb.Probe(self.t3d, "spike")
         self.probe2 = pb.Probe(self.n2, "spike")
 
+class Conv_HalfRoll_Net1(pb.DynSysGroup):
+    def __init__(self, shape, kernel, stride, padding):
+        super().__init__()
+
+        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=(1, 11, 11))
+        self.conv1 = pb.ConvHalfRoll(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
+        self.conv2 = pb.ConvHalfRoll(self.conv1, kernel, stride[1], padding[1], tick_wait_start=3)
+        # self.linear1 = pb.DelayFullConn(
+        #     self.conv1,
+        #     2,
+        #     delay=3,
+        #     weights=weight,
+        #     conn_type=pb.SynConnType.All2All,
+        #     tick_wait_start=3
+        # )
+class Conv_HalfRoll_Net2(pb.DynSysGroup):
+    def __init__(self, shape, kernel, stride, padding, out_feature, weight):
+        super().__init__()
+
+        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
+        self.conv1 = pb.ConvHalfRoll(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
+        self.conv2 = pb.ConvHalfRoll(self.conv1, kernel, stride[1], padding[1], tick_wait_start=3)
+        self.linear1 = pb.DelayFullConn(
+            self.conv2,
+            out_feature,
+            weights=weight,
+            conn_type=pb.SynConnType.All2All,
+            tick_wait_start=5
+
+        )
 
 class ANNNetwork(pb.Network):
     def __init__(self):

From 097f61b5037165f3ad25d613aa29c66e97e1c433 Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Wed, 17 Jul 2024 22:19:17 +0800
Subject: [PATCH 030/187] add Linear, AvgPool2d_SemiMap, MaxPool2d_SemiMap
 operators

---
 paibox/__init__.py                       |   5 +-
 paibox/components/functional.py          | 603 +++++++++++++++++------
 paibox/components/neuron/base.py         |  13 +-
 paibox/components/synapses/__init__.py   |   2 +-
 paibox/components/synapses/base.py       |  24 +-
 paibox/components/synapses/transforms.py |   4 +-
 paibox/mixin.py                          |  11 +
 paibox/network.py                        |  14 +-
 8 files changed, 514 insertions(+), 162 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index 6f076fda..bdd2d29a 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -14,9 +14,12 @@
 from .components.functional import SpikingSub as SpikingSub
 from .components.functional import Transpose2d as Transpose2d
 from .components.functional import Transpose3d as Transpose3d
-from .components.functional import Conv_HalfRoll as ConvHalfRoll
+from .components.functional import Conv2dSemiMap as Conv2dSemiMap
 from .components.functional import Filter as Filter
 from .components.functional import Delay_FullConn as DelayFullConn
+from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
+from .components.functional import AvgPool2dSemiMap as AvgPool2dSemiMap
+from .components.functional import Linear as Linear
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 65cfe7c9..b1404765 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1,3 +1,4 @@
+import math
 import sys
 from collections.abc import Sequence
 from functools import partial
@@ -14,7 +15,6 @@
     VOLTAGE_DTYPE,
     IntScalarType,
     NeuOutType,
-    SpikeType,
     VoltageType,
     WeightType,
 )
@@ -39,11 +39,11 @@
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, FullConnSyn, Conv2dHalfRollSyn
+from .synapses import ConnType, FullConnSyn, Conv2dHalfRollSyn, MaxPool2dSemiMapSyn
 from .synapses.conv_types import _Size2Type
-from .synapses.conv_utils import _fm_ndim2_check, _pair, _conv2d_faster
+from .synapses.conv_utils import _fm_ndim2_check, _pair
 from .synapses.transforms import Conv2dForward, _Pool2dForward
-from ..simulator.utils import _conv2d_faster_fp32
+
 
 if sys.version_info >= (3, 13):
     from warnings import deprecated
@@ -63,9 +63,12 @@
     "SpikingSub",
     "Transpose2d",
     "Transpose3d",
-    "Conv_HalfRoll",
+    "Conv2dSemiMap",
     "Filter",
-    "Delay_FullConn"
+    "Delay_FullConn",
+    "Linear",
+    "MaxPool2dSemiMap",
+    "AvgPool2dSemiMap",
 ]
 
 
@@ -74,13 +77,13 @@ class BitwiseAND(FunctionalModule2to1):
     inherent_delay = 0
 
     def __init__(
-        self,
-        neuron_a: Union[NeuDyn, InputProj],
-        neuron_b: Union[NeuDyn, InputProj],
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron_a: Union[NeuDyn, InputProj],
+            neuron_b: Union[NeuDyn, InputProj],
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """Bitwise AND module. Do a bitwise AND of the output spike of two neurons & output.
 
@@ -144,12 +147,12 @@ class BitwiseNOT(FunctionalModule):
     inherent_delay = 0
 
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """Bitwise NOT module. Do a bitwise NOT of the output spike of one neuron & output.
 
@@ -206,13 +209,13 @@ class BitwiseOR(FunctionalModule2to1):
     inherent_delay = 0
 
     def __init__(
-        self,
-        neuron_a: Union[NeuDyn, InputProj],
-        neuron_b: Union[NeuDyn, InputProj],
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron_a: Union[NeuDyn, InputProj],
+            neuron_b: Union[NeuDyn, InputProj],
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """Bitwise OR module. Do a bitwise OR of the output spike of two neurons & output.
 
@@ -263,13 +266,13 @@ class BitwiseXOR(FunctionalModule2to1):
     inherent_delay = 1
 
     def __init__(
-        self,
-        neuron_a: Union[NeuDyn, InputProj],
-        neuron_b: Union[NeuDyn, InputProj],
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron_a: Union[NeuDyn, InputProj],
+            neuron_b: Union[NeuDyn, InputProj],
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """Bitwise XOR module. Do a bitwise XOR of the output spike of two neurons & output.
 
@@ -343,13 +346,13 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 class DelayChain(FunctionalModule):
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        chain_level: int = 1,
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            chain_level: int = 1,
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """Delay chain. It will add extra neurons (and identity synapses) as buffer.
 
@@ -404,7 +407,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_start=self.tick_wait_start + i + 1,
             tick_wait_end=self.tick_wait_end,
             delay=self.delay_relative,
-            name=f"n{i+1}_{self.name}",
+            name=f"n{i + 1}_{self.name}",
         )
         n_delaychain.append(n_out)  # Must append to the last.
 
@@ -422,7 +425,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
                 n_delaychain[i + 1],
                 1,
                 conn_type=ConnType.One2One,
-                name=f"s{i+1}_{self.name}",
+                name=f"s{i + 1}_{self.name}",
             )
 
             s_delaychain.append(s_delay)
@@ -438,18 +441,18 @@ class SpikingAdd(FunctionalModule2to1WithV):
     inherent_delay = 0
 
     def __init__(
-        self,
-        neuron_a: Union[NeuDyn, InputProj],
-        neuron_b: Union[NeuDyn, InputProj],
-        factor_a: IntScalarType = 1,
-        factor_b: IntScalarType = 1,
-        pos_thres: IntScalarType = 1,
-        reset_v: Optional[int] = None,
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        overflow_strict: bool = False,
-        **kwargs,
+            self,
+            neuron_a: Union[NeuDyn, InputProj],
+            neuron_b: Union[NeuDyn, InputProj],
+            factor_a: IntScalarType = 1,
+            factor_b: IntScalarType = 1,
+            pos_thres: IntScalarType = 1,
+            reset_v: Optional[int] = None,
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            overflow_strict: bool = False,
+            **kwargs,
     ) -> None:
         """Spiking Addition module. The result will be reflected in time dimension.
 
@@ -479,7 +482,7 @@ def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageTyp
         return _spike_func_sadd_ssub(vjt, self.pos_threshold, self.reset_v)
 
     def synaptic_integr(
-        self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
+            self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
     ) -> VoltageType:
         return _sum_inputs_sadd_ssub(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
@@ -523,15 +526,15 @@ class _SpikingPool2dWithV(FunctionalModuleWithV):
     inherent_delay = 0
 
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        kernel_size: _Size2Type,
-        stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
-        pos_thres: Optional[int] = None,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            kernel_size: _Size2Type,
+            stride: Optional[_Size2Type] = None,
+            padding: _Size2Type = 0,
+            pos_thres: Optional[int] = None,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """Basic 2d spiking pooling."""
         # C,H,W
@@ -602,17 +605,17 @@ class _SpikingPool2d(FunctionalModule):
     inherent_delay = 0
 
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        kernel_size: _Size2Type,
-        pool_type: Literal["avg", "max"],
-        stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
-        threshold: Optional[int] = None,
-        # fm_order: _Order3d = "CHW",
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            kernel_size: _Size2Type,
+            pool_type: Literal["avg", "max"],
+            stride: Optional[_Size2Type] = None,
+            padding: _Size2Type = 0,
+            threshold: Optional[int] = None,
+            # fm_order: _Order3d = "CHW",
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """Basic 2d spiking pooling."""
         if pool_type not in ("avg", "max"):
@@ -688,17 +691,17 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 class SpikingAvgPool2d(_SpikingPool2d):
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        kernel_size: _Size2Type,
-        stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
-        threshold: Optional[int] = None,
-        # fm_order: _Order3d = "CHW",
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            kernel_size: _Size2Type,
+            stride: Optional[_Size2Type] = None,
+            padding: _Size2Type = 0,
+            threshold: Optional[int] = None,
+            # fm_order: _Order3d = "CHW",
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """2d average pooling for spike. The input feature map is in 'CHW' order by default.
 
@@ -728,16 +731,16 @@ def __init__(
 
 class SpikingAvgPool2dWithV(_SpikingPool2dWithV):
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        kernel_size: _Size2Type,
-        stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
-        threshold: Optional[int] = None,
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            kernel_size: _Size2Type,
+            stride: Optional[_Size2Type] = None,
+            padding: _Size2Type = 0,
+            threshold: Optional[int] = None,
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         super().__init__(
             neuron, kernel_size, stride, padding, threshold, keep_shape, name, **kwargs
@@ -753,16 +756,16 @@ class SpikingMaxPool2d(_SpikingPool2d):
     """
 
     def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        kernel_size: _Size2Type,
-        stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
-        # fm_order: _Order3d = "CHW",
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
+            self,
+            neuron: Union[NeuDyn, InputProj],
+            kernel_size: _Size2Type,
+            stride: Optional[_Size2Type] = None,
+            padding: _Size2Type = 0,
+            # fm_order: _Order3d = "CHW",
+            *,
+            keep_shape: bool = True,
+            name: Optional[str] = None,
+            **kwargs,
     ) -> None:
         """2d max pooling for spike.
 
@@ -822,7 +825,7 @@ def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageTyp
         return _spike_func_sadd_ssub(vjt, self.pos_threshold)
 
     def synaptic_integr(
-        self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
+            self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
     ) -> VoltageType:
         return _sum_inputs_sadd_ssub(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
@@ -987,25 +990,27 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 @set_rt_mode(8, 8, 0)
 class Delay_FullConn(FunctionalModule):
+    "That operator is used on the first fully connected layer after the semimap-convolution."
+
     def __init__(
             self,
             neuron_s: Union[NeuDyn, InputProj],
-            #neuron_d: Union[NeuDyn, InputProj],
             out_feature: tuple[int, ...],
-            #delay: int,
             weights: DataArrayType = 1,
+            bias: Union[int, LeakVType] = 0,
             conn_type: ConnType = ConnType.MatConn,
             keep_shape: bool = False,
             name: Optional[str] = None,
             **kwargs,
     ) -> None:
-        #self.delay =
+        # self.delay =
         self.weights = weights
         self.conn_type = conn_type
+        self.bias = bias
         _shape_out = out_feature
         super().__init__(
             neuron_s,
-            #neuron_d,
+            # neuron_d,
             shape_out=_shape_out,
             keep_shape=keep_shape,
             name=name,
@@ -1025,11 +1030,11 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
             self.shape_out,
             reset_mode=RM.MODE_NONRESET,
             neg_thres_mode=NTM.MODE_SATURATION,
-            leak_v=0,
+            leak_v=self.bias,
             neg_threshold=0,
             pos_threshold=0,
             delay=self.delay_relative,
-            tick_wait_start=self.tick_wait_start+1,
+            tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=self.tick_wait_end,
             input_width=self.input_width,
             spike_width=self.spike_width,
@@ -1042,7 +1047,7 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
                 shape=delay_shape,
                 leak_v=0,
                 neg_threshold=0,
-                delay=delay*i+1,
+                delay=delay * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
                 input_width=self.input_width,
@@ -1060,10 +1065,10 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay",
             )
-            #w = np.zeros((neuron.num_out, self.module_intf.operands[1].num_out))
-            w = self.weights[delay_shape[1]-i-1::delay_shape[1], :]
+            # w = np.zeros((neuron.num_out, self.module_intf.operands[1].num_out))
+            w = self.weights[delay_shape[1] - i - 1::delay_shape[1], :]
             syn2 = FullConnSyn(  # cin,(kw-1)*ih -> cout * oh
-                delay_neurons[i], # 54 -> 54
+                delay_neurons[i],  # 54 -> 54
                 neuron_d,
                 weights=w,
                 conn_type=self.conn_type,
@@ -1077,26 +1082,28 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
 
 
 @set_rt_mode(8, 8, 0)
-class Conv_HalfRoll(FunctionalModule):
+class Conv2dSemiMap(FunctionalModule):
     _spatial_ndim: ClassVar[int] = 2
 
     def __init__(
             self,
             neuron_s: Union[NeuDyn, InputProj],
-            #neuron_d: Union[NeuDyn, InputProj],
+            # neuron_d: Union[NeuDyn, InputProj],
             kernel: np.ndarray,
             stride: Optional[_Size2Type] = None,
             padding: _Size2Type = 0,
+            bias: Union[int, LeakVType] = 0,
             keep_shape: bool = False,
             name: Optional[str] = None,
             **kwargs,
     ) -> None:
-        """2d conv_halfroll for spike.
+        """2d conv_semimap for ANN mode.
 
         """
         self.kernel = kernel
         self.stride = _pair(stride)
         self.padding = _pair(padding)
+        self.bias = bias
         if kernel.ndim != self._spatial_ndim + 2:
             raise ShapeError(
                 f"convolution kernel dimension must be {self._spatial_ndim + 2}, but got {kernel.ndim}."
@@ -1109,7 +1116,7 @@ def __init__(
         else:
             in_ch, in_h, = neuron_s.shape_out
         cout, cin, kh, kw = kernel.shape
-        out_h = (in_h - kh + 2 * self.padding[0] ) // self.stride[0] + 1
+        out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
         if in_ch != cin:
             raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
 
@@ -1117,7 +1124,7 @@ def __init__(
 
         super().__init__(
             neuron_s,
-            #neuron_d,
+            # neuron_d,
             shape_out=_shape_out,
             keep_shape=keep_shape,
             name=name,
@@ -1126,14 +1133,13 @@ def __init__(
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         print("进入conv.spike_func")
-        print(x1)
-        #output = _conv2d_faster_fp32(x1, self.kernel, self.stride, self.padding)
-        output = _conv2d_faster(x1, self.shape_out, self.kernel, self.stride, self.padding)
-        output[output < 0] = 0
-        return output
+        # print(x1)
+        # output = _conv2d_faster_fp32(x1, self.kernel, self.stride, self.padding)
+        # output[output < 0] = 0
+        return #output
 
     def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
-        #print("进入build")
+        # print("进入build")
         if len(self.module_intf.operands[0].shape_out) != 2:
             in_ch, in_h, in_w = _fm_ndim2_check(self.module_intf.operands[0].shape_out, "CHW")
             self.module_intf.operands[0].shape_change((in_ch, in_h))
@@ -1145,11 +1151,11 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
             self.shape_out,
             reset_mode=RM.MODE_NONRESET,
             neg_thres_mode=NTM.MODE_SATURATION,
-            leak_v=0,
+            leak_v=self.bias,
             neg_threshold=0,
             pos_threshold=0,
             delay=self.delay_relative,
-            tick_wait_start=self.tick_wait_start+1,
+            tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=self.tick_wait_end,
             input_width=self.input_width,
             spike_width=self.spike_width,
@@ -1162,7 +1168,7 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
                 (cin, in_h),
                 leak_v=0,
                 neg_threshold=0,
-                delay=delay*i+1,
+                delay=delay * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
                 input_width=self.input_width,
@@ -1174,7 +1180,7 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
             n_delays.append(neuron)
             # 延时突触
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],# (2, 5)
+                self.module_intf.operands[0],  # (2, 5)
                 n_delays[i],
                 weights=_delay_mapping(in_h, cin, 1),
                 conn_type=ConnType.All2All,
@@ -1184,7 +1190,7 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
             syn2 = Conv2dHalfRollSyn(  # cin, ih -> cout * oh
                 n_delays[i],
                 relu,
-                kernel=self.kernel[:, :, :, kw-i-1],
+                kernel=self.kernel[:, :, :, kw - i - 1],
                 stride=self.stride,
                 padding=self.padding,
                 order="OIHW",
@@ -1197,6 +1203,10 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
 
         return generated
 
+@deprecated(
+    "The backend currently does not support 'Filter', please use it in a future version",
+    category=PAIBoxDeprecationWarning,
+)
 @set_rt_mode(8, 8, 0)
 class Filter(FunctionalModule):
 
@@ -1265,8 +1275,298 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode(8, 8, 0)
+class Linear(FunctionalModule):
+    "FullConn for ANN mode"
+    def __init__(
+            self,
+            neuron_s: Union[NeuDyn, InputProj],
+            out_feature: tuple[int, ...],
+            weights: DataArrayType = 1,
+            bias: Union[int, LeakVType] = 0,
+            conn_type: ConnType = ConnType.MatConn,
+            keep_shape: bool = False,
+            name: Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        self.weights = weights
+        self.conn_type = conn_type
+        self.bias = bias
+        _shape_out = out_feature
+        super().__init__(
+            neuron_s,
+            shape_out=_shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        output = x1.ravel() @ self.weights
+        output = output + self.bias
+        output[output < 0] = 0
+        return output
+
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
+        neuron_d = Neuron(
+            self.shape_out,
+            reset_mode=RM.MODE_NONRESET,
+            neg_thres_mode=NTM.MODE_SATURATION,
+            leak_v=self.bias,
+            neg_threshold=0,
+            pos_threshold=0,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start,
+            tick_wait_end=self.tick_wait_end,
+            input_width=self.input_width,
+            spike_width=self.spike_width,
+            snn_en=self.snn_en,
+            keep_shape=self.keep_shape,
+            name=f"nd_{self.name}",
+        )
+        syn1 = FullConnSyn(
+            self.module_intf.operands[0],
+            neuron_d,
+            weights=self.weights,
+            conn_type=self.conn_type,
+            name=f"syn1_{self.name}",
+        )
+        generated = [neuron_d, syn1]
+        self._rebuild_out_intf(network, neuron_d, *generated, **build_options)
+
+        return generated
+
+
+@set_rt_mode(8, 8, 0)
+class MaxPool2dSemiMap(FunctionalModule):
+    _spatial_ndim: ClassVar[int] = 2
+    def __init__(
+            self,
+            neuron_s: Union[NeuDyn, InputProj],
+            # neuron_d: Union[NeuDyn, InputProj],
+            kernel_size: _Size2Type,
+            stride: Optional[_Size2Type] = None,
+            # padding: _Size2Type = 0,
+            # bias: Union[int, LeakVType] = 0,
+            keep_shape: bool = False,
+            name: Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        """2d Pool2d_semimap for spike.
+
+        """
+        self.kernel_size = kernel_size
+        self.stride = _pair(stride)
+        self.pool_max = True
+        # self.padding = _pair(padding)
+        # self.bias = bias
+
+        if len(neuron_s.shape_out) != 2:
+            in_ch, in_h, in_w = neuron_s.shape_out
+        else:
+            in_ch, in_h, = neuron_s.shape_out
+        cout = cin = in_ch
+        out_h = (in_h - kernel_size[0]) // self.stride[0] + 1
+        if in_ch != cin:
+            raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
+
+        _shape_out = (cout, out_h)
+        # self.tfm = Conv2dHalfForward((in_ch, in_h), (out_channels, out_h), _kernel, stride, padding)
+        super().__init__(
+            neuron_s,
+            # neuron_d,
+            shape_out=_shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        print("进入pool2d_func")
+        return
+
+    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+        # print("进入build")
+        if len(self.module_intf.operands[0].shape_out) != 2:
+            in_ch, in_h, in_w = _fm_ndim2_check(self.module_intf.operands[0].shape_out, "CHW")
+            self.module_intf.operands[0].shape_change((in_ch, in_h))
+        in_ch, in_h = self.module_intf.operands[0].shape_out
+        cout = cin = in_ch
+        kh, kw = self.kernel_size
+        n_delays = NodeList()
+        s_delays = NodeList()
+        pool2d = Neuron(
+            self.shape_out,
+            reset_mode=RM.MODE_NONRESET,
+            neg_thres_mode=NTM.MODE_SATURATION,
+            leak_v=0,
+            neg_threshold=0,
+            pos_threshold=0,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start + 1,
+            tick_wait_end=self.tick_wait_end,
+            input_width=self.input_width,
+            spike_width=self.spike_width,
+            snn_en=self.snn_en,
+            pool_max=self.pool_max,
+            keep_shape=self.keep_shape,
+            name=f"nd_{self.name}",
+        )
+        for i in range(kw):
+            neuron = Neuron(
+                (cin, in_h),
+                leak_v=0,
+                neg_threshold=0,
+                delay=delay * i + 1,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                input_width=self.input_width,
+                spike_width=self.spike_width,
+                snn_en=self.snn_en,
+                keep_shape=self.keep_shape,
+                name=f"n{i}_{self.name}",
+            )
+            n_delays.append(neuron)
+            # 延时突触
+            syn1 = FullConnSyn(
+                self.module_intf.operands[0],  # (2, 5)
+                n_delays[i],
+                weights=_delay_mapping(in_h, cin, 1),
+                conn_type=ConnType.All2All,
+                name=f"s{i}_delay_{self.name}",
+            )
+            s_delays.append(syn1)
+            syn2 = MaxPool2dSemiMapSyn(
+                n_delays[i],
+                pool2d,
+                weights=_pool2d_semimap((cin, in_h), self.shape_out, self.kernel_size, self.stride),
+                name=f"s{i}_{self.name}",
+            )
+            s_delays.append(syn2)
+
+        generated = [pool2d, *n_delays, *s_delays]
+        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
+
+        return generated
+
+
+@set_rt_mode(8, 8, 0)
+class AvgPool2dSemiMap(FunctionalModule):
+    _spatial_ndim: ClassVar[int] = 2
+
+    def __init__(
+            self,
+            neuron_s: Union[NeuDyn, InputProj],
+            # neuron_d: Union[NeuDyn, InputProj],
+            kernel_size: _Size2Type,
+            stride: Optional[_Size2Type] = None,
+            # padding: _Size2Type = 0,
+            # bias: Union[int, LeakVType] = 0,
+            keep_shape: bool = False,
+            name: Optional[str] = None,
+            **kwargs,
+    ) -> None:
+        """2d AvgPool2d_semimap for spike.
+
+        """
+        self.kernel_size = kernel_size
+        self.stride = _pair(stride)
+        # self.padding = _pair(padding)
+        # self.bias = bias
+
+        if len(neuron_s.shape_out) != 2:
+            in_ch, in_h, in_w = neuron_s.shape_out
+        else:
+            in_ch, in_h, = neuron_s.shape_out
+        cout = cin = in_ch
+        out_h = (in_h - kernel_size[0]) // self.stride[0] + 1
+        if in_ch != cin:
+            raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
+
+        _shape_out = (cout, out_h)
+        # self.tfm = Conv2dHalfForward((in_ch, in_h), (out_channels, out_h), _kernel, stride, padding)
+        super().__init__(
+            neuron_s,
+            # neuron_d,
+            shape_out=_shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        print("进入pool2d_func")
+        return
+
+    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+        # print("进入build")
+        if len(self.module_intf.operands[0].shape_out) != 2:
+            in_ch, in_h, in_w = _fm_ndim2_check(self.module_intf.operands[0].shape_out, "CHW")
+            self.module_intf.operands[0].shape_change((in_ch, in_h))
+        in_ch, in_h = self.module_intf.operands[0].shape_out
+        cout = cin = in_ch
+        kh, kw = self.kernel_size
+        bittrunc = int(math.log2(kw * kh) + 8)
+        n_delays = NodeList()
+        s_delays = NodeList()
+        pool2d = Neuron(
+            self.shape_out,
+            reset_mode=RM.MODE_NONRESET,
+            neg_thres_mode=NTM.MODE_SATURATION,
+            leak_v=0,
+            neg_threshold=0,
+            pos_threshold=0,
+            delay=self.delay_relative,
+            bit_truncation=bittrunc,
+            tick_wait_start=self.tick_wait_start + 1,
+            tick_wait_end=self.tick_wait_end,
+            input_width=self.input_width,
+            spike_width=self.spike_width,
+            snn_en=self.snn_en,
+            keep_shape=self.keep_shape,
+            name=f"nd_{self.name}",
+        )
+        for i in range(kw):
+            neuron = Neuron(
+                (cin, in_h),
+                leak_v=0,
+                neg_threshold=0,
+                delay=delay * i + 1,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                input_width=self.input_width,
+                spike_width=self.spike_width,
+                snn_en=self.snn_en,
+                keep_shape=self.keep_shape,
+                name=f"n{i}_{self.name}",
+            )
+            n_delays.append(neuron)
+            # 延时突触
+            syn1 = FullConnSyn(
+                self.module_intf.operands[0],  # (2, 5)
+                n_delays[i],
+                weights=_delay_mapping(in_h, cin, 1),
+                conn_type=ConnType.All2All,
+                name=f"s{i}_delay_{self.name}",
+            )
+            s_delays.append(syn1)
+            syn2 = FullConnSyn(
+                n_delays[i],
+                pool2d,
+                weights=_pool2d_semimap((cin, in_h), self.shape_out, self.kernel_size, self.stride),
+                conn_type=ConnType.All2All,
+                name=f"s{i}_{self.name}",
+            )
+            s_delays.append(syn2)
+
+        generated = [pool2d, *n_delays, *s_delays]
+        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
+
+        return generated
+
+
 def _spike_func_sadd_ssub(
-    vjt: VoltageType, pos_thres: int, reset_v: Optional[int] = None
+        vjt: VoltageType, pos_thres: int, reset_v: Optional[int] = None
 ) -> tuple[NeuOutType, VoltageType]:
     """Function `spike_func()` in spiking addition & subtraction."""
     # Fire
@@ -1288,7 +1588,7 @@ def _spike_func_sadd_ssub(
 
 
 def _spike_func_avg_pool(
-    vjt: VoltageType, pos_thres: int
+        vjt: VoltageType, pos_thres: int
 ) -> tuple[NeuOutType, VoltageType]:
     """Function `spike_func()` in spiking addition & subtraction."""
     # Fire
@@ -1305,7 +1605,7 @@ def _spike_func_avg_pool(
 
 
 def _sum_inputs_sadd_ssub(
-    x1: NeuOutType, x2: NeuOutType, f1: int, f2: int, vjt_pre: VoltageType, strict: bool
+        x1: NeuOutType, x2: NeuOutType, f1: int, f2: int, vjt_pre: VoltageType, strict: bool
 ) -> VoltageType:
     """Function `sum_input()` for spiking addition & subtraction."""
     incoming_v = (vjt_pre + x1 * f1 + x2 * f2).astype(VOLTAGE_DTYPE)
@@ -1343,7 +1643,7 @@ def _transpose2d_mapping(op_shape: tuple[int, ...]) -> WeightType:
 
 
 def _transpose3d_mapping(
-    op_shape: tuple[int, ...], axes: tuple[int, ...]
+        op_shape: tuple[int, ...], axes: tuple[int, ...]
 ) -> WeightType:
     """Get the mapping matrix for transpose of 3d array.
 
@@ -1372,9 +1672,28 @@ def _transpose3d_mapping(
 
 
 def _delay_mapping(h: int, cin: int, n: int) -> WeightType:
-    mt = np.zeros((cin * h, cin * n * h), dtype=np.bool_)
-    for i in range(cin):
-        for j in range(n * cin):
-            for k in range(h):
-                mt[i * h + k, j * h + k] = 1
+    # mt = np.zeros((cin * h, cin * n * h), dtype=np.bool_)
+    # for i in range(cin):
+    #     for j in range(n * cin):
+    #         for k in range(h):
+    #             mt[i * h + k, j * h + k] = 1
+    mt = np.eye(cin * h, dtype=np.int8)
+    return mt
+
+
+def _pool2d_semimap(
+        in_shape: _Size2Type,
+        out_shape: _Size2Type,
+        kernel_size: WeightType,
+        stride: _Size2Type,
+) -> WeightType:
+    cout = cin = in_shape[0]
+    kh, kw = kernel_size
+    ih = in_shape[1]
+    o_ch, oh = out_shape
+    mt = np.zeros((cin * ih, cout * oh), dtype=np.bool_)
+    for i in range(cout):
+        for j in range(cin):
+            for k in range(oh):
+                mt[j * ih + k * stride[1]:j * ih + k * stride[1] + kh, i * oh + k] = 1
     return mt
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 4f54a5dd..e99068d5 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -16,6 +16,7 @@
     InputWidthFormat,
     SNNModeEnable,
     SpikeWidthFormat,
+    MaxPoolingEnable,
     get_core_mode,
 )
 
@@ -78,6 +79,7 @@ def __init__(
         input_width: InputWidthFormat,
         spike_width: SpikeWidthFormat,
         snn_en: SNNModeEnable,
+        pool_max: MaxPoolingEnable,
         overflow_strict: bool,
         keep_shape: bool = False,
     ) -> None:
@@ -90,6 +92,7 @@ def __init__(
         self.input_width = input_width
         self.spike_width = spike_width
         self.snn_en = snn_en
+        self.pool_max = pool_max
         # check whether the mode is valid
         self.mode = get_core_mode(input_width, spike_width, snn_en)
 
@@ -318,7 +321,7 @@ def _bit_truncate(self, vj: VoltageType) -> NeuOutType:
         """
 
         def _truncate() -> VoltageType:
-            if (vj >> self.bit_truncation).all() > 0:  # Saturate truncation
+            if (vj >> self.bit_truncation > 0).all():  # Saturate truncation
                 return np.full_like(vj, _mask(8))
             elif self.bit_truncation == 0:
                 return self._vjt0
@@ -329,7 +332,6 @@ def _truncate() -> VoltageType:
         v_truncated = np.where(
             self.thres_mode == TM.EXCEED_POSITIVE, _truncate(), self._vjt0
         )
-
         return v_truncated.astype(NEUOUT_U8_DTYPE)
 
     def _aux_pre_hook(self) -> None:
@@ -420,6 +422,7 @@ def __init__(
         input_width: Union[L[1, 8], InputWidthFormat] = InputWidthFormat.WIDTH_1BIT,
         spike_width: Union[L[1, 8], SpikeWidthFormat] = SpikeWidthFormat.WIDTH_1BIT,
         snn_en: bool = True,
+        pool_max: bool = False,
         unrolling_factor: int = 1,
         overflow_strict: bool = False,
         keep_shape: bool = True,
@@ -455,6 +458,7 @@ def __init__(
             _input_width_format(input_width),
             _spike_width_format(spike_width),
             SNNModeEnable(snn_en),
+            MaxPoolingEnable(pool_max),
             overflow_strict,
             keep_shape,
         )
@@ -495,7 +499,10 @@ def update(
             return None
 
         if x is None:
-            x = self.sum_inputs()
+            if not self.pool_max:
+                x = self.sum_inputs()
+            else:
+                x = self.max_inputs()
         else:
             x = np.atleast_1d(x)
         self._neu_out, self._vjt = super().update(x, self._vjt)
diff --git a/paibox/components/synapses/__init__.py b/paibox/components/synapses/__init__.py
index f5167f5c..0ae6e3e9 100644
--- a/paibox/components/synapses/__init__.py
+++ b/paibox/components/synapses/__init__.py
@@ -1,2 +1,2 @@
-from .base import FullConnectedSyn, FullConnSyn, Conv2dHalfRollSyn
+from .base import FullConnectedSyn, FullConnSyn, Conv2dHalfRollSyn, MaxPool2dSemiMapSyn
 from .transforms import ConnType
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index fc944226..8600824d 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -25,6 +25,7 @@
     MaskedLinear,
     OneToOne,
     Transform,
+    _CompareMax,
 )
 
 RIGISTER_MASTER_KEY_FORMAT = "{0}.output"
@@ -79,14 +80,9 @@ def update(self, x: Optional[NeuOutType] = None, *args, **kwargs) -> SynOutType:
         else:
             # Retrieve 0 to the dest neurons if it is not working
             synin = np.zeros_like(self.source.output)
-        # for i in range(5):
-        #     if self.name == f"s{i}_Conv_HalfRoll_1":
-        #         print(f"{self.name}", synin)
-        #         print(self.connectivity)
+
         self._synout = self.comm(synin).ravel()
-        # for i in range(5):
-        #     if self.name == f"s{i}_Conv_HalfRoll_1":
-        #         print(f"{self.name}", self._synout)
+
         return self._synout
 
     def reset_state(self, *args, **kwargs) -> None:
@@ -364,9 +360,7 @@ def __init__(
         if in_ch != in_channels:
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
-        #comm = Conv2dForward((in_h, in_w), (out_h, out_w), _kernel, stride, padding)
         self.comm = Conv2dHalfForward((in_ch, in_h), (out_channels, out_h), _kernel, stride, padding)
-        #print(self.comm.connectivity)
 
 class ConvTranspose1dSyn(FullConnectedSyn):
     _spatial_ndim: ClassVar[int] = 1
@@ -473,3 +467,15 @@ def __init__(
         self.comm = ConvTranspose2dForward(
             (in_h, in_w), (out_h, out_w), _kernel, stride, padding, output_padding
         )
+
+class MaxPool2dSemiMapSyn(FullConnectedSyn):
+
+    def __init__(
+            self,
+            source: Union[NeuDyn, InputProj],
+            dest: Neuron,
+            weights: DataArrayType = 1,
+            name: Optional[str] = None,
+    ) -> None:
+        super().__init__(source, dest, name)
+        self.comm = _CompareMax((self.num_in, self.num_out), weights)
\ No newline at end of file
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index dcc0d0af..323fcdae 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -68,6 +68,7 @@ class ConnType(Enum):
     """All-to-all connection."""
 
 
+
 def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
     """Convert raw weights to `np.ndarray` coarsely (without optimization).
 
@@ -416,9 +417,6 @@ def __init__(
         super().__init__(kernel)
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
-        # print(x)
-        # print(self.connectivity)
-        # print(x@self.connectivity)
         return x @ self.connectivity
 
 
diff --git a/paibox/mixin.py b/paibox/mixin.py
index f16af011..d8fdd620 100644
--- a/paibox/mixin.py
+++ b/paibox/mixin.py
@@ -154,6 +154,17 @@ def sum_inputs(self, *args, **kwargs) -> VoltageType:
 
         return np.asarray(output, dtype=VOLTAGE_DTYPE)
 
+    def max_inputs(self, *args, **kwargs) -> VoltageType:
+        output = None
+        for node in self.master_nodes.values():
+            if output is None:
+                output = node.output.copy()
+            else:
+                output = np.maximum(output, node.output.copy())
+
+        return np.asarray(output, dtype=VOLTAGE_DTYPE)
+
+
 
 class TimeRelatedNode(MixIn):
     """Add time-related properties for `NeuDyn` & `InputProj`."""
diff --git a/paibox/network.py b/paibox/network.py
index ce9f9d81..bb26d994 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -74,19 +74,27 @@ def build_fmodule(
         cls, network: "DynSysGroup", **build_options
     ) -> dict[NeuModule, BuiltComponentType]:
         try:
-            from .components.functional import Conv_HalfRoll, Delay_FullConn
+            from .components.functional import Conv2dSemiMap, Delay_FullConn, MaxPool2dSemiMap, AvgPool2dSemiMap
         except ImportError:
-            Conv_HalfRoll, Delay_FullConn = None
+            Conv2dSemiMap, Delay_FullConn = None
         generated = dict()
         modules = network.nodes().subset(NeuModule).unique()
         delay = 1
         for module in modules.values():
-            if Conv_HalfRoll is not None and isinstance(module, Conv_HalfRoll):
+            if Conv2dSemiMap is not None and isinstance(module, Conv2dSemiMap):
                 generated[module] = module.build(network, delay, **build_options)
                 if module.stride[1] != 1:
                     delay = delay*module.stride[1]
             elif Delay_FullConn is not None and isinstance(module, Delay_FullConn):
                 generated[module] = module.build(network, delay, **build_options)
+            elif MaxPool2dSemiMap is not None and isinstance(module, MaxPool2dSemiMap):
+                generated[module] = module.build(network, delay, **build_options)
+                if module.stride[1] != 1:
+                    delay = delay*module.stride[1]
+            elif AvgPool2dSemiMap is not None and isinstance(module, AvgPool2dSemiMap):
+                generated[module] = module.build(network, delay, **build_options)
+                if module.stride[1] != 1:
+                    delay = delay*module.stride[1]
             else:
                 generated[module] = module.build(network, **build_options)
 

From 2430b2562618342d7eddf55877f29c83868b2cc7 Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Wed, 17 Jul 2024 22:20:32 +0800
Subject: [PATCH 031/187] add test cases for Linear, avgpool2d_semimap

---
 tests/components/test_functional.py | 151 ++++++++++++++++++++++------
 tests/shared_networks.py            |  46 ++++++---
 2 files changed, 149 insertions(+), 48 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 629ac5f7..5db08dae 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -11,7 +11,7 @@
 
 
 def _assert_build_fmodule(
-    network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
+        network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
 ):
     nodes = network.nodes().subset(DynamicSys).unique()
     assert len(nodes) == n_node_bef_build
@@ -412,16 +412,16 @@ def test_SpikingSub_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingPool2d(
-        self,
-        shape,
-        channels,
-        ksize,
-        stride,
-        padding,
-        threshold,
-        fm_order,
-        pool_type,
-        p_binomial,
+            self,
+            shape,
+            channels,
+            ksize,
+            stride,
+            padding,
+            threshold,
+            fm_order,
+            pool_type,
+            p_binomial,
     ):
         from tests.shared_networks import SpikingPool2d_Net
 
@@ -506,14 +506,14 @@ def test_SpikingPool2d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingAvgPool2dWithV(
-        self,
-        shape,
-        channels,
-        ksize,
-        stride,
-        padding,
-        threshold,
-        p_binomial,
+            self,
+            shape,
+            channels,
+            ksize,
+            stride,
+            padding,
+            threshold,
+            p_binomial,
     ):
         """NOTE: This function is a native implementation of SNNs and is therefore not  \
             compared to the ANN implementation."""
@@ -647,35 +647,65 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
+    @pytest.mark.parametrize(
+        "shape, kernel, stride, padding",
+        [
+            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+             [1, 1], [0, 0]),
+            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+             [1, 2], [0, 0]),
+            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+             [2, 1], [0, 0]),
+            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+             [2, 2], [0, 0]),
+        ],
+    )
+    def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
+        from tests.shared_networks import Conv2dSemiMap_Net1
+        net1 = Conv2dSemiMap_Net1(shape, kernel, stride, padding)
+        conv = net1.conv1
+        generated = DynSysGroup.build_fmodule(net1)
+        sim1 = pb.Simulator(net1, start_time_zero=False)
+        probe_conv = pb.Probe(generated[conv][0], "output")
+        sim1.add_probe(probe_conv)
+        inpa = np.random.randint(0, 5, size=(3, 11, 11)).astype(np.int8)
+        inpb = np.concatenate([inpa, np.zeros((3, 10, 11))], axis=1)
+        for i in range(15):
+            pb.FRONTEND_ENV.save(data1=inpb[:, i, :])
+            sim1.run(1)
+        expected = _conv2d_faster_fp32(np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0]))
+        expected[expected < 0] = 0
+        expected = expected & (1 << 8) - 1
+        # print(expected)
+        # print(sim1.data[probe_conv])
+
     @pytest.mark.parametrize(
         "shape, kernel, stride, padding, out_feature, weight",
         [
             ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-             [1, 1], [0, 0], 10, np.random.randint(-5, 5, size=(7*7, 10), dtype=np.int8)),
+             [1, 1], [0, 0], 10, np.random.randint(-5, 5, size=(7 * 7, 10), dtype=np.int8)),
             ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-             [1, 2], [0, 0], 10, np.random.randint(-5, 5, size=(4*4, 10), dtype=np.int8)),
+             [1, 2], [0, 0], 10, np.random.randint(-5, 5, size=(4 * 4, 10), dtype=np.int8)),
             ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
              [2, 1], [0, 0], 10, np.random.randint(-5, 5, size=(3 * 3, 10), dtype=np.int8)),
             ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-             [2, 2], [0, 0], 10, np.random.randint(-5, 5, size=(2*2, 10), dtype=np.int8)),
+             [2, 2], [0, 0], 10, np.random.randint(-5, 5, size=(2 * 2, 10), dtype=np.int8)),
         ],
     )
-    def test_Conv_HalfRoll_Net(self, shape, kernel, stride, padding, out_feature, weight):
-        from tests.shared_networks import Conv_HalfRoll_Net1, Conv_HalfRoll_Net2
+    def test_Conv2dSemiMap_Net(self, shape, kernel, stride, padding, out_feature, weight):
+        from tests.shared_networks import Conv2dSemiMap_Net2
 
-        #net1 = Conv_HalfRoll_Net1(shape, kernel, stride, padding, out_feature, delay, weight)
-        net2 = Conv_HalfRoll_Net2(shape, kernel, stride, padding, out_feature, weight)
+        net2 = Conv2dSemiMap_Net2(shape, kernel, stride, padding, out_feature, weight)
         conv = net2.conv2
         linear = net2.linear1
         generated = DynSysGroup.build_fmodule(net2)
-        #sim1 = pb.Simulator(net1, start_time_zero=False)
+        # sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
         probe_conv = pb.Probe(generated[conv][0], "output")
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim2.add_probe(probe_conv)
         sim2.add_probe(probe_linear)
-        # Use binomial distribution to generate a sparse matrix with more zeros
         inpa = np.random.randint(0, 5, size=(1, 11, 11)).astype(np.int8)
         inpb = np.concatenate([inpa, np.zeros((1, 10, 11))], axis=1)
         for i in range(17):
@@ -683,12 +713,67 @@ def test_Conv_HalfRoll_Net(self, shape, kernel, stride, padding, out_feature, we
             sim2.run(1)
         expected = _conv2d_faster_fp32(np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0]))
         expected[expected < 0] = 0
+
         expected = _conv2d_faster_fp32(expected, kernel, _pair(stride[1]), _pair(padding[1]))
         expected[expected < 0] = 0
-        # print(sim2.data[probe_conv][7:14])
-        # print(expected)
+
+        expected = np.array(expected, dtype=np.int32)
         expected = expected.ravel() @ weight
         expected[expected < 0] = 0
-        #expected = np.clip(expected, 0, 7)
-        print(expected)
-        print(sim2.data[probe_linear][15])
+        if (expected >> 8).all() > 0:
+            expected = np.full_like(expected, ((1 << 8) - 1))
+        else:
+            expected = expected & ((1 << 8) - 1)
+        # expected = np.clip(expected, 0, 7)
+        assert np.array_equal(expected, sim2.data[probe_linear][15])
+
+    @pytest.mark.parametrize(
+        "shape, kernel_size, stride, weight",
+        [
+            ((1, 8), (2, 2), [1, 1], np.random.randint(-5, 5, size=(6 * 6, 2), dtype=np.int8)),
+            ((1, 8), (2, 2), [2, 2], np.random.randint(-5, 5, size=(2 * 2, 2), dtype=np.int8)),
+        ],
+    )
+    def test_AvgPool2dSemiMap(self, shape, kernel_size, stride, weight):
+        from tests.shared_networks import AvgPool2dSemiMap_Net
+        net1 = AvgPool2dSemiMap_Net(shape, kernel_size, stride, weight)
+        avg = net1.avgpool2
+        linear = net1.linear1
+        generated = DynSysGroup.build_fmodule(net1)
+        sim1 = pb.Simulator(net1, start_time_zero=False)
+        probe_linear = pb.Probe(generated[linear][0], "output")
+        probe_avg = pb.Probe(generated[avg][0], "output")
+        sim1.add_probe(probe_avg)
+        sim1.add_probe(probe_linear)
+        inpa = np.random.randint(0, 10, size=(1, 8, 8)).astype(np.int8)
+        inpb = np.concatenate([inpa, np.zeros((1, 10, 8))], axis=1)
+        for i in range(12):
+            pb.FRONTEND_ENV.save(data1=inpb[:, i, :])
+            sim1.run(1)
+
+        #print(sim1.data[probe_avg])
+        #print(sim1.data[probe_linear])
+
+    @pytest.mark.parametrize(
+        "shape, weight1",
+        [
+            ((3, 5, 5), np.random.randint(-5, 5, size=(3 * 5 * 5, 10), dtype=np.int8)),
+        ],
+    )
+    def test_Linear(self, shape, weight1):
+        from tests.shared_networks import Linear_Net
+        net1 = Linear_Net(shape, weight1)
+        net2 = Linear_Net(shape, weight1)
+        linear = net2.linear1
+        generated = pb.DynSysGroup.build_fmodule(net2)
+        sim1 = pb.Simulator(net1, start_time_zero=False)
+        sim2 = pb.Simulator(net2, start_time_zero=False)
+        probe_linear = pb.Probe(generated[linear][0], "output")
+        sim2.add_probe(probe_linear)
+        inpa = np.random.randint(0, 10, (3, 5, 5), dtype=np.int8)
+        for i in range(1):
+            pb.FRONTEND_ENV.save(data1=inpa)
+            sim1.run(1)
+            sim2.run(1)
+
+        assert np.array_equal(sim1.data[net1.probe1][0], sim2.data[probe_linear][0])
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 6f3a2553..d37bfc37 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -216,37 +216,53 @@ def __init__(self, shape, axes):
         self.probe1 = pb.Probe(self.t3d, "spike")
         self.probe2 = pb.Probe(self.n2, "spike")
 
-class Conv_HalfRoll_Net1(pb.DynSysGroup):
+class Conv2dSemiMap_Net1(pb.DynSysGroup):
     def __init__(self, shape, kernel, stride, padding):
         super().__init__()
 
-        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=(1, 11, 11))
-        self.conv1 = pb.ConvHalfRoll(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
-        self.conv2 = pb.ConvHalfRoll(self.conv1, kernel, stride[1], padding[1], tick_wait_start=3)
-        # self.linear1 = pb.DelayFullConn(
-        #     self.conv1,
-        #     2,
-        #     delay=3,
-        #     weights=weight,
-        #     conn_type=pb.SynConnType.All2All,
-        #     tick_wait_start=3
-        # )
-class Conv_HalfRoll_Net2(pb.DynSysGroup):
+        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
+        self.conv1 = pb.Conv2dSemiMap(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
+
+
+
+class Conv2dSemiMap_Net2(pb.DynSysGroup):
     def __init__(self, shape, kernel, stride, padding, out_feature, weight):
         super().__init__()
 
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.ConvHalfRoll(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
-        self.conv2 = pb.ConvHalfRoll(self.conv1, kernel, stride[1], padding[1], tick_wait_start=3)
+        self.conv1 = pb.Conv2dSemiMap(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
+        self.conv2 = pb.Conv2dSemiMap(self.conv1, kernel, stride[1], padding[1], tick_wait_start=3)
         self.linear1 = pb.DelayFullConn(
             self.conv2,
             out_feature,
             weights=weight,
+            bias=0,
             conn_type=pb.SynConnType.All2All,
             tick_wait_start=5
+        )
 
+class AvgPool2dSemiMap_Net(pb.DynSysGroup):
+    def __init__(self, shape, kernel_size, stride, weight):
+        super().__init__()
+        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
+        self.avgpool1 = pb.AvgPool2dSemiMap(self.i1, kernel_size, stride[0], tick_wait_start=1)
+        self.avgpool2 = pb.AvgPool2dSemiMap(self.avgpool1, kernel_size, stride[1], tick_wait_start=3)
+        self.linear1 = pb.DelayFullConn(
+            self.avgpool2,
+            2,
+            weights=weight,
+            bias=0,
+            conn_type=pb.SynConnType.All2All,
+            tick_wait_start=5
         )
 
+class Linear_Net(pb.DynSysGroup):
+    def __init__(self, shape, weight1):
+        super().__init__()
+        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
+        self.linear1 = pb.Linear(self.i1, 10, weights=weight1, bias=2, conn_type=pb.SynConnType.All2All)
+        self.probe1 = pb.Probe(self.linear1, "spike")
+
 class ANNNetwork(pb.Network):
     def __init__(self):
         super().__init__()

From 9d285c03ebbe94a1960eb75209fec637befaa85a Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Thu, 18 Jul 2024 22:02:45 +0800
Subject: [PATCH 032/187] add test cases for MaxPool2d_SemiMap

---
 hzy/hzy_test.py                     | 316 ----------------------------
 paibox/components/functional.py     |   2 +
 tests/components/test_functional.py |  56 +++--
 tests/components/utils.py           |  63 +++++-
 tests/shared_networks.py            |  14 +-
 5 files changed, 114 insertions(+), 337 deletions(-)
 delete mode 100644 hzy/hzy_test.py

diff --git a/hzy/hzy_test.py b/hzy/hzy_test.py
deleted file mode 100644
index 07d5fa97..00000000
--- a/hzy/hzy_test.py
+++ /dev/null
@@ -1,316 +0,0 @@
-
-
-import numpy as np
-import paibox as pb
-from paibox.components.functional import Conv_HalfRoll, Filter
-from paibox.components.synapses import Conv2dHalfRollSyn
-from paibox.components.synapses.conv_utils import _conv2d_halfroll
-from paibox.simulator.utils import _conv2d_faster_fp32
-
-
-weight1 = np.random.randint(0, 10, size=(32, 1, 5, 5), dtype=np.int8)
-weight2 = np.random.randint(0, 10, size=(32, 32, 2, 2), dtype=np.int8)
-weight3 = np.random.randint(0, 10, size=(64, 32, 5, 5), dtype=np.int8)
-weight4 = np.random.randint(0, 10, size=(64, 64, 2, 2), dtype=np.int8)
-
-class Conv2d_Net(pb.Network):
-    def __init__(self, Vthr1, Vthr2, Vthr3):
-        super().__init__()
-
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(1, 28, 28))
-        self.n1 = pb.IF((32, 24, 24), threshold=Vthr1, reset_v=0)
-        self.conv2d_1 = pb.Conv2d(self.i1, self.n1, kernel=weight1, stride=1)
-
-        self.n2 = pb.IF((32, 12, 12), threshold=Vthr2, reset_v=0, tick_wait_start=2)
-        self.conv2d_2 = pb.Conv2d(self.n1, self.n2, kernel=weight2, stride=2)
-
-        self.n3 = pb.IF((64, 8, 8), threshold=Vthr3, reset_v=0, tick_wait_start=3)
-        self.conv2d_3 = pb.Conv2d(self.n2, self.n3, kernel=weight3, stride=1)
-        self.n4 = pb.IF((64, 4, 4), threshold=Vthr3, reset_v=0, tick_wait_start=4)
-        self.conv2d_4 = pb.Conv2d(self.n3, self.n4, kernel=weight4, stride=2)
-        self.n5 = pb.IF((256,), threshold=Vthr3, reset_v=0, tick_wait_start=5)
-        self.fc1 = pb.FullConn(
-            self.n4, self.n5, weights=np.random.randint(0, 10, size=(1024, 256), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All
-        )
-        self.n6 = pb.IF((64,), threshold=Vthr3, reset_v=0, tick_wait_start=6)
-        self.fc2 = pb.FullConn(
-            self.n5, self.n6, weights=np.random.randint(0, 10, size=(256, 64), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All
-        )
-        self.n7 = pb.IF((10,), threshold=Vthr3, reset_v=0, tick_wait_start=7)
-        self.fc3 = pb.FullConn(
-            self.n6, self.n7, weights=np.random.randint(0, 10, size=(64, 10), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All
-        )
-
-        self.probe1 = pb.Probe(self.n3, "spike")
-
-
-
-input_data2 = np.array([1,0,1,0,1], dtype=np.bool_)
-class fcnet_4(pb.DynSysGroup):
-    def __init__(self):
-        super().__init__()
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(1, 28, 28))
-        #self.i1 = pb.InputProj(input=out_bypass1, shape_out=(1, 5))
-        self.n1 = pb.IF((1, 28), threshold=4, reset_v=0, name="n_1")
-        self.s0 = pb.FullConn(
-            self.i1,
-            self.n1,
-            weights=1,
-            conn_type=pb.SynConnType.All2All,
-        )
-        # self.probe1 = pb.Probe(self.n1, "spike")
-        self.n2 = pb.IF((32, 24, 24), threshold=0, reset_v=0, name="n_2")
-        #self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, np.array([[[[2,1,2],[1,2,1],[1,2,3]]]], dtype=np.int8), 1, tick_wait_start=1)
-        self.conv1 = pb.ConvHalfRoll(self.n1, self.n2, weight1, 1)
-        self.n3 = pb.IF((32, 12, 12), threshold=1, reset_v=0, name="n_3")
-        self.conv2 = pb.ConvHalfRoll(self.n2, self.n3, weight2, 2)
-        self.n4 = pb.IF((64, 8, 8), threshold=1, reset_v=0, name="n_4")
-        self.conv3 = pb.ConvHalfRoll(self.n3, self.n4, weight3, 1)
-        self.n5 = pb.IF((64, 4, 4), threshold=1, reset_v=0, name="n_5")
-        self.conv4 = pb.ConvHalfRoll(self.n4, self.n5, weight4, 2)
-        self.n6 = pb.IF((256,), threshold=1, reset_v=0, name="n_6")
-        self.linear1 = pb.DelayFullConn(
-            self.n5,
-            self.n6,
-            delay=4,
-            weights=np.random.randint(0, 10, size=(1024, 256), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.n7 = pb.IF((64,), threshold=1, reset_v=0, name="n_7")
-        self.linear2 = pb.FullConn(
-            self.n6,
-            self.n7,
-            weights=np.random.randint(0, 10, size=(256, 64), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.n8 = pb.IF((10,), threshold=1, reset_v=0, name="n_8")
-        self.linear2 = pb.FullConn(
-            self.n7,
-            self.n8,
-            weights=np.random.randint(0, 10, size=(64, 10), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.filter = pb.Filter(self.n8, 28)
-
-
-def out_bypass1(t, data1, *args, **kwargs):
-    return data1
-
-# input_data1 = np.array([[1,2,5,7,5],
-#                        [2,0,8,8,2],
-#                        [3,8,5,7,5],
-#                        [4,9,2,5,4],
-#                        [5,10,2,3,8],
-#                        [0,0,0,0,0],
-#                        [0,0,0,0,0],
-#                        [0,0,0,0,0],
-#                        [0,0,0,0,0],
-#                        [0,0,0,0,0]], dtype=np.int8)
-#
-# weight1 = np.array([[1,0],
-#  [0 ,1],
-#  [1 ,0],
-#  [0 ,1],
-#  [1 ,0],
-#  [0 ,1],
-#  [0 ,1],
-#  [0 ,0],
-#  [1 ,1]], dtype=np.int8)
-inpa = np.random.randint(0, 2, size=(1, 11, 11)).astype(np.int8)
-inpb = np.concatenate([inpa, np.zeros((1, 10, 11))], axis=1)
-weight = np.random.randint(0, 2, size=(3*3, 2), dtype=np.int8)
-class fcnet_5(pb.DynSysGroup):
-    def __init__(self):
-        super().__init__()
-        self.i1 = pb.InputProj(input=out_bypass1, shape_out=(1, 11))
-        self.conv1 = pb.ConvHalfRoll(self.i1, np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8), 2, 0, tick_wait_start=1)
-        self.conv2 = pb.ConvHalfRoll(self.conv1, np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8), 1, 0, tick_wait_start=3)
-        self.linear1 = pb.DelayFullConn(
-            self.conv2,
-            2,
-            weights=weight,
-            conn_type=pb.SynConnType.All2All,
-            tick_wait_start=5
-        )
-
-pb_net1 = fcnet_5()
-conv = pb_net1.conv2
-linear = pb_net1.linear1
-generated = pb.DynSysGroup.build_fmodule(pb_net1)
-
-sim1 = pb.Simulator(pb_net1, start_time_zero=False)
-
-
-probe_conv = pb.Probe(generated[conv][0], "output")
-probe_linear = pb.Probe(generated[linear][0], "output")
-sim1.add_probe(probe_conv)
-sim1.add_probe(probe_linear)
-for i in range(20):
-    pb.FRONTEND_ENV.save(data1=inpb[0][i])
-    sim1.run(1)
-    #print(pb_net1.nd_Delay_FullConn_0.output)
-    #sim2.run(1)
-for i in range(17):
-#     print(sim1.data[probe_conv][i])
-    print(sim1.data[probe_linear][i])
-data = np.array(sim1.data[probe_conv][8:15])
-print(data)
-#data = np.transpose(data, (1, 0))
-print(data)
-# output = data.ravel() @ weight
-# print(output)
-# output =_conv2d_faster_fp32(np.array([[[1,2,3,4,5],[2,0,8,9,10],[5,8,5,2,2],[7,8,7,5,3],[5,2,5,4,8]]]),
-#                             np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8),
-#                             (2,2),
-#                             (1,1))
-# output[output < 0] = 0
-# print(output)
-# #output = np.transpose(output, (0, 2, 1))
-#
-# #print(output.ravel() @ weight1)
-# output = _conv2d_faster_fp32(output, np.array([[[[2,1,2],[1,-2,1],[-1,2,-3]]]], dtype=np.int8),(2,2),(0,0))
-# output[output < 0] = 0
-# print(output)
-
-
-class deeplabv2(pb.DynSysGroup):
-    def __init__(self):
-        super().__init__()
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(3, 256))
-        self.n1 = pb.LIF((64, 254), threshold=0, reset_v=0, name="n_1")
-        self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, np.random.randint(0, 10, size=(64,3,3,3), dtype=np.int8), 1)
-        self.n2 = pb.LIF((64, 252), threshold=0, reset_v=0, name="n_2")
-        self.conv2 = pb.ConvHalfRoll(self.n1, self.n2, np.random.randint(0, 10, size=(64,64,3,3), dtype=np.int8), 1)
-        self.n3 = pb.LIF((64, 127), threshold=0, reset_v=0, name="n_3")
-        self.maxpool2d1 = pb.ConvHalfRoll(self.n2, self.n3, np.random.randint(0, 1, size=(64, 64, 3, 3), dtype=np.bool_), 2)
-        self.n4 = pb.LIF((128, 125), threshold=0, reset_v=0, name="n_4")
-        self.conv3 = pb.ConvHalfRoll(self.n3, self.n4, np.random.randint(0, 10, size=(128,64,3,3), dtype=np.int8), 1)
-        self.n5 = pb.LIF((128, 123), threshold=0, reset_v=0, name="n_5")
-        self.conv4 = pb.ConvHalfRoll(self.n4, self.n5, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
-        self.n6 = pb.LIF((128, 62), threshold=0, reset_v=0, name="n_6")
-        self.maxpool2d2 = pb.ConvHalfRoll(self.n5, self.n6, np.random.randint(0, 1, size=(128, 128, 3, 3), dtype=np.bool_), 2)
-        self.n7 = pb.LIF((128, 60), threshold=0, reset_v=0, name="n_7")
-        self.conv5 = pb.ConvHalfRoll(self.n6, self.n7, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
-        self.n8 = pb.LIF((2, 58), threshold=0, reset_v=0, name="n_8")
-        self.conv6 = pb.ConvHalfRoll(self.n7, self.n8, np.random.randint(0, 10, size=(2, 128, 3, 3), dtype=np.int8), 1)
-        self.n9 = pb.IF((116,), threshold=1, reset_v=0, name="n_9")
-        self.linear2 = pb.FullConn(
-            self.n8,
-            self.n9,
-            weights=np.random.randint(0, 1, size=(116, 116), dtype=np.bool_),
-            conn_type=pb.SynConnType.All2All,
-        )
-# w = np.array(   [[
-#                 [[2, 2, 2],[5,5,5],[9,9,9]],
-#                 [[1, 1, 1],[4,4,4],[7,7,7]],
-#                 ]]
-# )
-w = np.random.randint(1, 10, size=(1,1,3,3), dtype=np.int8)
-class deeplabv3(pb.DynSysGroup):
-    def __init__(self):
-        super().__init__()
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(1, 10))
-        self.n1 = pb.LIF((1, 10), threshold=0, reset_v=0, name="n_1")
-        self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, w, 1)
-
-
-        # self.n2 = pb.LIF((64, 28), threshold=0, reset_v=0, name="n_2")
-        # self.conv2 = pb.ConvHalfRoll(self.n1, self.n2, np.random.randint(0, 10, size=(64,64,3,3), dtype=np.int8), 1)
-        # self.n3 = pb.LIF((100, 24), threshold=0, reset_v=0, name="n_3")
-        # self.maxpool2d1 = pb.ConvHalfRoll(self.n1, self.n3, np.random.randint(0, 1, size=(100, 100, 3, 3), dtype=np.bool_), 2)
-        # #
-        # self.n4 = pb.LIF((8, 22), threshold=0, reset_v=0, name="n_4")
-        # self.conv3 = pb.ConvHalfRoll(self.n3, self.n4, np.random.randint(0, 10, size=(8,100,3,3), dtype=np.int8), 1)
-        # # # self.n5 = pb.LIF((128, 10), threshold=0, reset_v=0, name="n_5")
-        # # # self.conv4 = pb.ConvHalfRoll(self.n4, self.n5, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
-        # # # self.n6 = pb.LIF((128, 5), threshold=0, reset_v=0, name="n_6")
-        # # # self.maxpool2d2 = pb.ConvHalfRoll(self.n5, self.n6, np.random.randint(0, 1, size=(128, 128, 3, 3), dtype=np.bool_), 2)
-        # # # self.n7 = pb.LIF((128, 3), threshold=0, reset_v=0, name="n_7")
-        # # # self.conv5 = pb.ConvHalfRoll(self.n6, self.n7, np.random.randint(0, 10, size=(128, 128, 3, 3), dtype=np.int8), 1)
-        # self.n8 = pb.LIF((2, 251), threshold=0, reset_v=0, name="n_8")
-        # self.conv6 = pb.ConvHalfRoll(self.n4, self.n8, np.random.randint(0, 10, size=(2, 8, 3, 3), dtype=np.int8), 1)
-        # self.n9 = pb.IF((54,), threshold=1, reset_v=0, name="n_9")
-        # self.linear2 = pb.DelayFullConn(
-        #     self.n8,
-        #     self.n9,
-        #     delay=27,
-        #     weights=np.random.randint(0, 1, size=(2*27*27, 54), dtype=np.bool_),
-        #     conn_type=pb.SynConnType.All2All,
-        # )
-        # self.linear2 = pb.FullConn(
-        #     self.n8,
-        #     self.n9,
-        #     weights=np.random.randint(0, 1, size=(2 * 9, 10), dtype=np.bool_),
-        #     conn_type=pb.SynConnType.All2All,
-        # )
-class snn3(pb.DynSysGroup):
-    def __init__(self):
-        super().__init__()
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(1, 128))
-        self.n1 = pb.LIF((64, 128), threshold=0, reset_v=0, name="n_1")
-        self.conv1 = pb.ConvHalfRoll(self.i1, self.n1, np.random.randint(0,10, size=(64,1,3,3), dtype=np.int8), 1)
-        self.n3 = pb.LIF((64, 64), threshold=0, reset_v=0, name="n_3")
-        self.maxpool2d1 = pb.ConvHalfRoll(self.n1, self.n3, np.random.randint(0, 1, size=(64, 64, 2, 2), dtype=np.bool_), 2)
-        self.n4 = pb.LIF((64, 64), threshold=0, reset_v=0, name="n_4")
-        self.conv2 = pb.ConvHalfRoll(self.n3, self.n4, np.random.randint(0, 10, size=(64, 64, 3, 3), dtype=np.int8), 1)
-        self.n5 = pb.LIF((64, 32), threshold=0, reset_v=0, name="n_5")
-        self.maxpool2d2 = pb.ConvHalfRoll(self.n4, self.n5,
-                                          np.random.randint(0, 1, size=(64, 64, 2, 2), dtype=np.bool_), 2)
-        self.n6 = pb.LIF((64, 4, 4), threshold=0, reset_v=0, name="n_6")
-
-        self.linear1 = pb.DelayFullConn(
-            self.n5,
-            self.n6,
-            delay=32,
-            weights=np.random.randint(0, 10, size=(64*32*32, 64*4*4), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All,
-        )
-        self.n7 = pb.LIF((10,), threshold=0, reset_v=0, name="n_7")
-
-        self.linear2 = pb.FullConn(
-            self.n6,
-            self.n7,
-            weights=np.random.randint(0, 10, size=(64 * 4 * 4, 10), dtype=np.int8),
-            conn_type=pb.SynConnType.All2All,
-        )
-kernel = np.array([[[[1, 2, 3],
-                    [4, 5, 6],
-                    [7, 8, 9]]]], dtype=np.int8)
-class paddingnet(pb.DynSysGroup):
-    def __init__(self):
-        super().__init__()
-        pe = pb.simulator.PoissonEncoder()
-        self.i1 = pb.InputProj(input=pe, shape_out=(1, 4))
-        self.n1 = pb.IF((1, 4, 4), threshold=0, reset_v=0, name="n_1")
-        self.conv = pb.ConvHalfRoll(self.i1, self.n1, kernel, stride=1, padding=1)
-
-#pb_net.conv.build(pb_net, 3)
-#
-# pb.BACKEND_CONFIG.target_chip_addr = [(0, 0), (0, 1)]
-# mapper = pb.Mapper()
-# mapper.build(pb_net)
-#
-# graph_info = mapper.compile()
-# print("Core required:", graph_info["n_core_required"])
-# print("Core occupied:", graph_info["n_core_occupied"])
-
-
-
-# #print(graph_info["members"])
-# for k, v in graph_info["members"].items():
-#     for c, coreplm in v.items():
-#         print(c)
-#         for k, v in coreplm.neuron_configs.items():
-#             print(k.name,v)
-#             for n,s in k.master_nodes.items():
-#                 print(s.name)
-#                 # print(s.connectivity)
-
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index b1404765..bdb76eeb 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -17,6 +17,8 @@
     NeuOutType,
     VoltageType,
     WeightType,
+    DataArrayType,
+    LeakVType,
 )
 from paibox.utils import (
     arg_check_non_neg,
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 5db08dae..0aa9f532 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -8,6 +8,7 @@
 from paibox.network import DynSysGroup
 from paibox.simulator.utils import _conv2d_faster_fp32
 from paibox.utils import as_shape, shape2num, typical_round
+from tests.components.utils import max_pooling, avg_pooling
 
 
 def _assert_build_fmodule(
@@ -443,7 +444,7 @@ def test_SpikingPool2d(
         sim2.add_probe(probe_p2d)
 
         # Use binomial distribution to generate a sparse matrix with more zeros
-        inpa = np.random.binomial(1, p_binomial, size=(20,)).astype(np.bool_)
+        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
 
         for i in range(20):
             pb.FRONTEND_ENV.save(data1=inpa[i])
@@ -674,8 +675,11 @@ def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
             pb.FRONTEND_ENV.save(data1=inpb[:, i, :])
             sim1.run(1)
         expected = _conv2d_faster_fp32(np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0]))
-        expected[expected < 0] = 0
-        expected = expected & (1 << 8) - 1
+        expected = np.array(expected, dtype=np.int32)
+        if (expected >> 8).all() > 0:
+            expected = np.full_like(expected, ((1 << 8) - 1))
+        else:
+            expected = expected & ((1 << 8) - 1)
         # print(expected)
         # print(sim1.data[probe_conv])
 
@@ -728,30 +732,52 @@ def test_Conv2dSemiMap_Net(self, shape, kernel, stride, padding, out_feature, we
         assert np.array_equal(expected, sim2.data[probe_linear][15])
 
     @pytest.mark.parametrize(
-        "shape, kernel_size, stride, weight",
+        "shape, kernel_size, stride, weight, pool_type",
         [
-            ((1, 8), (2, 2), [1, 1], np.random.randint(-5, 5, size=(6 * 6, 2), dtype=np.int8)),
-            ((1, 8), (2, 2), [2, 2], np.random.randint(-5, 5, size=(2 * 2, 2), dtype=np.int8)),
+            ((1, 8), (2, 2), [1, 1], np.random.randint(-5, 5, size=(6 * 6, 2), dtype=np.int8), "avg"),
+            ((1, 8), (2, 2), [2, 2], np.random.randint(-5, 5, size=(2 * 2, 2), dtype=np.int8), "avg"),
+            ((1, 8), (2, 2), [1, 1], np.random.randint(0, 5, size=(6 * 6, 2), dtype=np.int8), "max"),
+            ((1, 8), (2, 2), [2, 2], np.random.randint(0, 5, size=(2 * 2, 2), dtype=np.int8), "max"),
         ],
     )
-    def test_AvgPool2dSemiMap(self, shape, kernel_size, stride, weight):
-        from tests.shared_networks import AvgPool2dSemiMap_Net
-        net1 = AvgPool2dSemiMap_Net(shape, kernel_size, stride, weight)
-        avg = net1.avgpool2
+    def test_Pool2dSemiMap(self, shape, kernel_size, stride, weight, pool_type):
+        from tests.shared_networks import Pool2dSemiMap_Net
+        net1 = Pool2dSemiMap_Net(shape, kernel_size, stride, weight, pool_type)
+        pool = net1.pool2
         linear = net1.linear1
         generated = DynSysGroup.build_fmodule(net1)
         sim1 = pb.Simulator(net1, start_time_zero=False)
         probe_linear = pb.Probe(generated[linear][0], "output")
-        probe_avg = pb.Probe(generated[avg][0], "output")
-        sim1.add_probe(probe_avg)
+        probe_pool = pb.Probe(generated[pool][0], "output")
+        sim1.add_probe(probe_pool)
         sim1.add_probe(probe_linear)
         inpa = np.random.randint(0, 10, size=(1, 8, 8)).astype(np.int8)
         inpb = np.concatenate([inpa, np.zeros((1, 10, 8))], axis=1)
-        for i in range(12):
+        for i in range(13):
             pb.FRONTEND_ENV.save(data1=inpb[:, i, :])
             sim1.run(1)
-
-        #print(sim1.data[probe_avg])
+        if pool_type == "max":
+            expected = max_pooling(np.transpose(inpa, (0, 2, 1)), kernel_size, stride)
+            expected = max_pooling(expected, kernel_size, stride)
+            expected = np.array(expected, dtype=np.int32)
+            expected = expected.ravel() @ weight
+            if (expected >> 8).all() > 0:
+                expected = np.full_like(expected, ((1 << 8) - 1))
+            else:
+                expected = expected & ((1 << 8) - 1)
+            assert np.array_equal(expected, sim1.data[probe_linear][12])
+        else:
+            expected = avg_pooling(np.transpose(inpa, (0, 2, 1)), kernel_size, stride)
+            expected = avg_pooling(expected, kernel_size, stride)
+            expected = np.array(expected, dtype=np.int32)
+            expected = expected.ravel() @ weight
+            expected[expected < 0] = 0
+            if (expected >> 8).all() > 0:
+                expected = np.full_like(expected, ((1 << 8) - 1))
+            else:
+                expected = expected & ((1 << 8) - 1)
+            assert np.array_equal(expected, sim1.data[probe_linear][12])
+        #print(sim1.data[probe_pool])
         #print(sim1.data[probe_linear])
 
     @pytest.mark.parametrize(
diff --git a/tests/components/utils.py b/tests/components/utils.py
index 134510a2..9c40b871 100644
--- a/tests/components/utils.py
+++ b/tests/components/utils.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from paibox.types import SpikeType
+from paibox.types import SpikeType, NeuOutType
 
 
 def maxpool2d_golden(
@@ -83,3 +83,64 @@ def avgpool2d_golden(
                 )
 
     return out >= threshold
+
+
+def max_pooling(input_data, kernel_size: tuple[int, int], stride:tuple[int, int],) -> NeuOutType:
+    """
+    实现最大池化层
+
+    参数:
+    input_data (numpy.ndarray): 输入数据,形状为(channels, height, width)
+    kernel_size (int): 池化核大小
+    stride (int): 步长
+
+    返回:
+    numpy.ndarray: 池化后的输出数据,形状为(channels, new_height, new_width)
+    """
+    channels, height, width = input_data.shape
+    new_height = (height - kernel_size[0]) // stride[0] + 1
+    new_width = (width - kernel_size[1]) // stride[1] + 1
+
+    output_data = np.zeros((channels, new_height, new_width))
+
+    for c in range(channels):
+        for i in range(new_height):
+            for j in range(new_width):
+                x1 = i * stride[0]
+                y1 = j * stride[1]
+                x2 = x1 + kernel_size[0]
+                y2 = y1 + kernel_size[1]
+                output_data[c, i, j] = np.max(input_data[c, x1:x2, y1:y2])
+
+    return output_data
+
+
+def avg_pooling(input_data, kernel_size: tuple[int, int], stride:tuple[int, int],) -> NeuOutType:
+    """
+    实现平均池化层
+
+    参数:
+    input_data (numpy.ndarray): 输入数据,形状为(batch_size, channels, height, width)
+    kernel_size (int): 池化核大小
+    stride (int): 步长
+
+    返回:
+    numpy.ndarray: 池化后的输出数据,形状为(batch_size, channels, new_height, new_width)
+    """
+    channels, height, width = input_data.shape
+    kernel_height, kernel_width = kernel_size
+    new_height = (height - kernel_size[0]) // stride[0] + 1
+    new_width = (width - kernel_size[1]) // stride[1] + 1
+
+    output_data = np.zeros((channels, new_height, new_width), dtype=np.int32)
+
+    for c in range(channels):
+        for i in range(new_height):
+            for j in range(new_width):
+                x1 = i * stride[0]
+                y1 = j * stride[1]
+                x2 = x1 + kernel_size[0]
+                y2 = y1 + kernel_size[1]
+                output_data[c, i, j] = np.sum(input_data[c, x1:x2, y1:y2]) >> ((kernel_height * kernel_width).bit_length()-1)
+
+    return output_data
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index d37bfc37..36291107 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -241,14 +241,18 @@ def __init__(self, shape, kernel, stride, padding, out_feature, weight):
             tick_wait_start=5
         )
 
-class AvgPool2dSemiMap_Net(pb.DynSysGroup):
-    def __init__(self, shape, kernel_size, stride, weight):
+class Pool2dSemiMap_Net(pb.DynSysGroup):
+    def __init__(self, shape, kernel_size, stride, weight, pool_type):
         super().__init__()
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.avgpool1 = pb.AvgPool2dSemiMap(self.i1, kernel_size, stride[0], tick_wait_start=1)
-        self.avgpool2 = pb.AvgPool2dSemiMap(self.avgpool1, kernel_size, stride[1], tick_wait_start=3)
+        if pool_type == "avg":
+            self.pool1 = pb.AvgPool2dSemiMap(self.i1, kernel_size, stride[0], tick_wait_start=1)
+            self.pool2 = pb.AvgPool2dSemiMap(self.pool1, kernel_size, stride[1], tick_wait_start=3)
+        else:
+            self.pool1 = pb.MaxPool2dSemiMap(self.i1, kernel_size, stride[0], tick_wait_start=1)
+            self.pool2 = pb.MaxPool2dSemiMap(self.pool1, kernel_size, stride[1], tick_wait_start=3)
         self.linear1 = pb.DelayFullConn(
-            self.avgpool2,
+            self.pool2,
             2,
             weights=weight,
             bias=0,

From cf0f268ed1f077209c1020188091aa6e3ca85ed6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 18:04:07 +0000
Subject: [PATCH 033/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/__init__.py                       |  12 +-
 paibox/components/functional.py          | 454 ++++++++++++-----------
 paibox/components/neuron/base.py         |   9 +-
 paibox/components/synapses/__init__.py   |   2 +-
 paibox/components/synapses/base.py       |  37 +-
 paibox/components/synapses/conv_utils.py | 286 +++++++-------
 paibox/components/synapses/synapses.py   |  77 ++--
 paibox/components/synapses/transforms.py |   5 +-
 paibox/mixin.py                          |   1 -
 paibox/network.py                        |  13 +-
 tests/components/test_functional.py      | 167 ++++++---
 tests/components/utils.py                |  18 +-
 tests/shared_networks.py                 |  41 +-
 13 files changed, 632 insertions(+), 490 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index bdd2d29a..ba9fd9a1 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -2,11 +2,17 @@
 
 from .backend import BACKEND_CONFIG as BACKEND_CONFIG
 from .backend import Mapper as Mapper
+from .components.functional import AvgPool2dSemiMap as AvgPool2dSemiMap
 from .components.functional import BitwiseAND as BitwiseAND
 from .components.functional import BitwiseNOT as BitwiseNOT
 from .components.functional import BitwiseOR as BitwiseOR
 from .components.functional import BitwiseXOR as BitwiseXOR
+from .components.functional import Conv2dSemiMap as Conv2dSemiMap
+from .components.functional import Delay_FullConn as DelayFullConn
 from .components.functional import DelayChain as DelayChain
+from .components.functional import Filter as Filter
+from .components.functional import Linear as Linear
+from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
 from .components.functional import SpikingAdd as SpikingAdd
 from .components.functional import SpikingAvgPool2d as SpikingAvgPool2d
 from .components.functional import SpikingAvgPool2dWithV as SpikingAvgPool2dWithV
@@ -14,12 +20,6 @@
 from .components.functional import SpikingSub as SpikingSub
 from .components.functional import Transpose2d as Transpose2d
 from .components.functional import Transpose3d as Transpose3d
-from .components.functional import Conv2dSemiMap as Conv2dSemiMap
-from .components.functional import Filter as Filter
-from .components.functional import Delay_FullConn as DelayFullConn
-from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
-from .components.functional import AvgPool2dSemiMap as AvgPool2dSemiMap
-from .components.functional import Linear as Linear
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index bdb76eeb..f04719a9 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -2,7 +2,7 @@
 import sys
 from collections.abc import Sequence
 from functools import partial
-from typing import Literal, Optional, Union, ClassVar
+from typing import ClassVar, Literal, Optional, Union
 
 import numpy as np
 from paicorelib import NTM, RM, TM
@@ -13,12 +13,12 @@
 from paibox.types import (
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
+    DataArrayType,
     IntScalarType,
+    LeakVType,
     NeuOutType,
     VoltageType,
     WeightType,
-    DataArrayType,
-    LeakVType,
 )
 from paibox.utils import (
     arg_check_non_neg,
@@ -41,12 +41,11 @@
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, FullConnSyn, Conv2dHalfRollSyn, MaxPool2dSemiMapSyn
+from .synapses import ConnType, Conv2dHalfRollSyn, FullConnSyn, MaxPool2dSemiMapSyn
 from .synapses.conv_types import _Size2Type
 from .synapses.conv_utils import _fm_ndim2_check, _pair
 from .synapses.transforms import Conv2dForward, _Pool2dForward
 
-
 if sys.version_info >= (3, 13):
     from warnings import deprecated
 else:
@@ -79,13 +78,13 @@ class BitwiseAND(FunctionalModule2to1):
     inherent_delay = 0
 
     def __init__(
-            self,
-            neuron_a: Union[NeuDyn, InputProj],
-            neuron_b: Union[NeuDyn, InputProj],
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_a: Union[NeuDyn, InputProj],
+        neuron_b: Union[NeuDyn, InputProj],
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """Bitwise AND module. Do a bitwise AND of the output spike of two neurons & output.
 
@@ -149,12 +148,12 @@ class BitwiseNOT(FunctionalModule):
     inherent_delay = 0
 
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """Bitwise NOT module. Do a bitwise NOT of the output spike of one neuron & output.
 
@@ -211,13 +210,13 @@ class BitwiseOR(FunctionalModule2to1):
     inherent_delay = 0
 
     def __init__(
-            self,
-            neuron_a: Union[NeuDyn, InputProj],
-            neuron_b: Union[NeuDyn, InputProj],
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_a: Union[NeuDyn, InputProj],
+        neuron_b: Union[NeuDyn, InputProj],
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """Bitwise OR module. Do a bitwise OR of the output spike of two neurons & output.
 
@@ -268,13 +267,13 @@ class BitwiseXOR(FunctionalModule2to1):
     inherent_delay = 1
 
     def __init__(
-            self,
-            neuron_a: Union[NeuDyn, InputProj],
-            neuron_b: Union[NeuDyn, InputProj],
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_a: Union[NeuDyn, InputProj],
+        neuron_b: Union[NeuDyn, InputProj],
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """Bitwise XOR module. Do a bitwise XOR of the output spike of two neurons & output.
 
@@ -348,13 +347,13 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 class DelayChain(FunctionalModule):
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            chain_level: int = 1,
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        chain_level: int = 1,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """Delay chain. It will add extra neurons (and identity synapses) as buffer.
 
@@ -443,18 +442,18 @@ class SpikingAdd(FunctionalModule2to1WithV):
     inherent_delay = 0
 
     def __init__(
-            self,
-            neuron_a: Union[NeuDyn, InputProj],
-            neuron_b: Union[NeuDyn, InputProj],
-            factor_a: IntScalarType = 1,
-            factor_b: IntScalarType = 1,
-            pos_thres: IntScalarType = 1,
-            reset_v: Optional[int] = None,
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            overflow_strict: bool = False,
-            **kwargs,
+        self,
+        neuron_a: Union[NeuDyn, InputProj],
+        neuron_b: Union[NeuDyn, InputProj],
+        factor_a: IntScalarType = 1,
+        factor_b: IntScalarType = 1,
+        pos_thres: IntScalarType = 1,
+        reset_v: Optional[int] = None,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        overflow_strict: bool = False,
+        **kwargs,
     ) -> None:
         """Spiking Addition module. The result will be reflected in time dimension.
 
@@ -484,7 +483,7 @@ def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageTyp
         return _spike_func_sadd_ssub(vjt, self.pos_threshold, self.reset_v)
 
     def synaptic_integr(
-            self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
+        self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
     ) -> VoltageType:
         return _sum_inputs_sadd_ssub(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
@@ -528,15 +527,15 @@ class _SpikingPool2dWithV(FunctionalModuleWithV):
     inherent_delay = 0
 
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            kernel_size: _Size2Type,
-            stride: Optional[_Size2Type] = None,
-            padding: _Size2Type = 0,
-            pos_thres: Optional[int] = None,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        pos_thres: Optional[int] = None,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """Basic 2d spiking pooling."""
         # C,H,W
@@ -607,17 +606,17 @@ class _SpikingPool2d(FunctionalModule):
     inherent_delay = 0
 
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            kernel_size: _Size2Type,
-            pool_type: Literal["avg", "max"],
-            stride: Optional[_Size2Type] = None,
-            padding: _Size2Type = 0,
-            threshold: Optional[int] = None,
-            # fm_order: _Order3d = "CHW",
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        pool_type: Literal["avg", "max"],
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        threshold: Optional[int] = None,
+        # fm_order: _Order3d = "CHW",
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """Basic 2d spiking pooling."""
         if pool_type not in ("avg", "max"):
@@ -693,17 +692,17 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 class SpikingAvgPool2d(_SpikingPool2d):
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            kernel_size: _Size2Type,
-            stride: Optional[_Size2Type] = None,
-            padding: _Size2Type = 0,
-            threshold: Optional[int] = None,
-            # fm_order: _Order3d = "CHW",
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        threshold: Optional[int] = None,
+        # fm_order: _Order3d = "CHW",
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """2d average pooling for spike. The input feature map is in 'CHW' order by default.
 
@@ -733,16 +732,16 @@ def __init__(
 
 class SpikingAvgPool2dWithV(_SpikingPool2dWithV):
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            kernel_size: _Size2Type,
-            stride: Optional[_Size2Type] = None,
-            padding: _Size2Type = 0,
-            threshold: Optional[int] = None,
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        threshold: Optional[int] = None,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         super().__init__(
             neuron, kernel_size, stride, padding, threshold, keep_shape, name, **kwargs
@@ -758,16 +757,16 @@ class SpikingMaxPool2d(_SpikingPool2d):
     """
 
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            kernel_size: _Size2Type,
-            stride: Optional[_Size2Type] = None,
-            padding: _Size2Type = 0,
-            # fm_order: _Order3d = "CHW",
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        # fm_order: _Order3d = "CHW",
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """2d max pooling for spike.
 
@@ -800,14 +799,14 @@ class SpikingSub(FunctionalModule2to1WithV):
     pos_threshold: int = 1
 
     def __init__(
-            self,
-            neuron_a: Union[NeuDyn, InputProj],
-            neuron_b: Union[NeuDyn, InputProj],
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            overflow_strict: bool = False,
-            **kwargs,
+        self,
+        neuron_a: Union[NeuDyn, InputProj],
+        neuron_b: Union[NeuDyn, InputProj],
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        overflow_strict: bool = False,
+        **kwargs,
     ) -> None:
         """Spiking subtraction module. The result will be reflected in time dimension.
 
@@ -827,7 +826,7 @@ def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageTyp
         return _spike_func_sadd_ssub(vjt, self.pos_threshold)
 
     def synaptic_integr(
-            self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
+        self, x1: NeuOutType, x2: NeuOutType, vjt_pre: VoltageType
     ) -> VoltageType:
         return _sum_inputs_sadd_ssub(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
@@ -874,12 +873,12 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 @set_rt_mode(1, 1, 1)
 class Transpose2d(TransposeModule):
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """2d transpose module.
 
@@ -933,13 +932,13 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 @set_rt_mode(1, 1, 1)
 class Transpose3d(TransposeModule):
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            axes: Optional[Sequence[int]] = None,
-            *,
-            keep_shape: bool = True,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        axes: Optional[Sequence[int]] = None,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         """3d transpose module.
 
@@ -995,15 +994,15 @@ class Delay_FullConn(FunctionalModule):
     "That operator is used on the first fully connected layer after the semimap-convolution."
 
     def __init__(
-            self,
-            neuron_s: Union[NeuDyn, InputProj],
-            out_feature: tuple[int, ...],
-            weights: DataArrayType = 1,
-            bias: Union[int, LeakVType] = 0,
-            conn_type: ConnType = ConnType.MatConn,
-            keep_shape: bool = False,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_s: Union[NeuDyn, InputProj],
+        out_feature: tuple[int, ...],
+        weights: DataArrayType = 1,
+        bias: Union[int, LeakVType] = 0,
+        conn_type: ConnType = ConnType.MatConn,
+        keep_shape: bool = False,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         # self.delay =
         self.weights = weights
@@ -1023,9 +1022,13 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         output = x1 @ self.weights
         return output
 
-    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+    def build(
+        self, network: DynSysGroup, delay: int, **build_options
+    ) -> BuiltComponentType:
         if len(self.module_intf.operands[0].shape_out) != 2:
-            raise ShapeError("The source node must be a successor to the half-convolution")
+            raise ShapeError(
+                "The source node must be a successor to the half-convolution"
+            )
         delay_shape = self.module_intf.operands[0].shape_out
         delay_neurons = []
         neuron_d = Neuron(
@@ -1068,7 +1071,7 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
                 name=f"s{i}_delay",
             )
             # w = np.zeros((neuron.num_out, self.module_intf.operands[1].num_out))
-            w = self.weights[delay_shape[1] - i - 1::delay_shape[1], :]
+            w = self.weights[delay_shape[1] - i - 1 :: delay_shape[1], :]
             syn2 = FullConnSyn(  # cin,(kw-1)*ih -> cout * oh
                 delay_neurons[i],  # 54 -> 54
                 neuron_d,
@@ -1088,20 +1091,18 @@ class Conv2dSemiMap(FunctionalModule):
     _spatial_ndim: ClassVar[int] = 2
 
     def __init__(
-            self,
-            neuron_s: Union[NeuDyn, InputProj],
-            # neuron_d: Union[NeuDyn, InputProj],
-            kernel: np.ndarray,
-            stride: Optional[_Size2Type] = None,
-            padding: _Size2Type = 0,
-            bias: Union[int, LeakVType] = 0,
-            keep_shape: bool = False,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_s: Union[NeuDyn, InputProj],
+        # neuron_d: Union[NeuDyn, InputProj],
+        kernel: np.ndarray,
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        bias: Union[int, LeakVType] = 0,
+        keep_shape: bool = False,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
-        """2d conv_semimap for ANN mode.
-
-        """
+        """2d conv_semimap for ANN mode."""
         self.kernel = kernel
         self.stride = _pair(stride)
         self.padding = _pair(padding)
@@ -1116,7 +1117,10 @@ def __init__(
         #     in_ch, in_h, in_w = _fm_ndim2_check(neuron_s.shape_out, "CHW")
         #     neuron_s.shape_change((in_ch, in_h))
         else:
-            in_ch, in_h, = neuron_s.shape_out
+            (
+                in_ch,
+                in_h,
+            ) = neuron_s.shape_out
         cout, cin, kh, kw = kernel.shape
         out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
         if in_ch != cin:
@@ -1138,12 +1142,16 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         # print(x1)
         # output = _conv2d_faster_fp32(x1, self.kernel, self.stride, self.padding)
         # output[output < 0] = 0
-        return #output
+        return  # output
 
-    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+    def build(
+        self, network: DynSysGroup, delay: int, **build_options
+    ) -> BuiltComponentType:
         # print("进入build")
         if len(self.module_intf.operands[0].shape_out) != 2:
-            in_ch, in_h, in_w = _fm_ndim2_check(self.module_intf.operands[0].shape_out, "CHW")
+            in_ch, in_h, in_w = _fm_ndim2_check(
+                self.module_intf.operands[0].shape_out, "CHW"
+            )
             self.module_intf.operands[0].shape_change((in_ch, in_h))
         in_ch, in_h = self.module_intf.operands[0].shape_out
         cout, cin, kh, kw = self.kernel.shape
@@ -1205,6 +1213,7 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
 
         return generated
 
+
 @deprecated(
     "The backend currently does not support 'Filter', please use it in a future version",
     category=PAIBoxDeprecationWarning,
@@ -1213,15 +1222,14 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
 class Filter(FunctionalModule):
 
     def __init__(
-            self,
-            neuron: Union[NeuDyn, InputProj],
-            time_to_fire: int,
-            keep_shape: bool = False,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        time_to_fire: int,
+        keep_shape: bool = False,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
-        """
-        """
+        """ """
         shape_out = neuron.shape_out
         self.time_to_fire = time_to_fire
         self.cur_time = 0
@@ -1254,7 +1262,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             spike_width=self.spike_width,
             snn_en=self.snn_en,
             keep_shape=self.keep_shape,
-            name="filter"
+            name="filter",
         )
 
         syn1 = FullConnSyn(
@@ -1280,16 +1288,17 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 @set_rt_mode(8, 8, 0)
 class Linear(FunctionalModule):
     "FullConn for ANN mode"
+
     def __init__(
-            self,
-            neuron_s: Union[NeuDyn, InputProj],
-            out_feature: tuple[int, ...],
-            weights: DataArrayType = 1,
-            bias: Union[int, LeakVType] = 0,
-            conn_type: ConnType = ConnType.MatConn,
-            keep_shape: bool = False,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_s: Union[NeuDyn, InputProj],
+        out_feature: tuple[int, ...],
+        weights: DataArrayType = 1,
+        bias: Union[int, LeakVType] = 0,
+        conn_type: ConnType = ConnType.MatConn,
+        keep_shape: bool = False,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
         self.weights = weights
         self.conn_type = conn_type
@@ -1342,21 +1351,20 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
 @set_rt_mode(8, 8, 0)
 class MaxPool2dSemiMap(FunctionalModule):
     _spatial_ndim: ClassVar[int] = 2
+
     def __init__(
-            self,
-            neuron_s: Union[NeuDyn, InputProj],
-            # neuron_d: Union[NeuDyn, InputProj],
-            kernel_size: _Size2Type,
-            stride: Optional[_Size2Type] = None,
-            # padding: _Size2Type = 0,
-            # bias: Union[int, LeakVType] = 0,
-            keep_shape: bool = False,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_s: Union[NeuDyn, InputProj],
+        # neuron_d: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        stride: Optional[_Size2Type] = None,
+        # padding: _Size2Type = 0,
+        # bias: Union[int, LeakVType] = 0,
+        keep_shape: bool = False,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
-        """2d Pool2d_semimap for spike.
-
-        """
+        """2d Pool2d_semimap for spike."""
         self.kernel_size = kernel_size
         self.stride = _pair(stride)
         self.pool_max = True
@@ -1366,7 +1374,10 @@ def __init__(
         if len(neuron_s.shape_out) != 2:
             in_ch, in_h, in_w = neuron_s.shape_out
         else:
-            in_ch, in_h, = neuron_s.shape_out
+            (
+                in_ch,
+                in_h,
+            ) = neuron_s.shape_out
         cout = cin = in_ch
         out_h = (in_h - kernel_size[0]) // self.stride[0] + 1
         if in_ch != cin:
@@ -1387,10 +1398,14 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         print("进入pool2d_func")
         return
 
-    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+    def build(
+        self, network: DynSysGroup, delay: int, **build_options
+    ) -> BuiltComponentType:
         # print("进入build")
         if len(self.module_intf.operands[0].shape_out) != 2:
-            in_ch, in_h, in_w = _fm_ndim2_check(self.module_intf.operands[0].shape_out, "CHW")
+            in_ch, in_h, in_w = _fm_ndim2_check(
+                self.module_intf.operands[0].shape_out, "CHW"
+            )
             self.module_intf.operands[0].shape_change((in_ch, in_h))
         in_ch, in_h = self.module_intf.operands[0].shape_out
         cout = cin = in_ch
@@ -1441,7 +1456,9 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
             syn2 = MaxPool2dSemiMapSyn(
                 n_delays[i],
                 pool2d,
-                weights=_pool2d_semimap((cin, in_h), self.shape_out, self.kernel_size, self.stride),
+                weights=_pool2d_semimap(
+                    (cin, in_h), self.shape_out, self.kernel_size, self.stride
+                ),
                 name=f"s{i}_{self.name}",
             )
             s_delays.append(syn2)
@@ -1457,20 +1474,18 @@ class AvgPool2dSemiMap(FunctionalModule):
     _spatial_ndim: ClassVar[int] = 2
 
     def __init__(
-            self,
-            neuron_s: Union[NeuDyn, InputProj],
-            # neuron_d: Union[NeuDyn, InputProj],
-            kernel_size: _Size2Type,
-            stride: Optional[_Size2Type] = None,
-            # padding: _Size2Type = 0,
-            # bias: Union[int, LeakVType] = 0,
-            keep_shape: bool = False,
-            name: Optional[str] = None,
-            **kwargs,
+        self,
+        neuron_s: Union[NeuDyn, InputProj],
+        # neuron_d: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        stride: Optional[_Size2Type] = None,
+        # padding: _Size2Type = 0,
+        # bias: Union[int, LeakVType] = 0,
+        keep_shape: bool = False,
+        name: Optional[str] = None,
+        **kwargs,
     ) -> None:
-        """2d AvgPool2d_semimap for spike.
-
-        """
+        """2d AvgPool2d_semimap for spike."""
         self.kernel_size = kernel_size
         self.stride = _pair(stride)
         # self.padding = _pair(padding)
@@ -1479,7 +1494,10 @@ def __init__(
         if len(neuron_s.shape_out) != 2:
             in_ch, in_h, in_w = neuron_s.shape_out
         else:
-            in_ch, in_h, = neuron_s.shape_out
+            (
+                in_ch,
+                in_h,
+            ) = neuron_s.shape_out
         cout = cin = in_ch
         out_h = (in_h - kernel_size[0]) // self.stride[0] + 1
         if in_ch != cin:
@@ -1500,10 +1518,14 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         print("进入pool2d_func")
         return
 
-    def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltComponentType:
+    def build(
+        self, network: DynSysGroup, delay: int, **build_options
+    ) -> BuiltComponentType:
         # print("进入build")
         if len(self.module_intf.operands[0].shape_out) != 2:
-            in_ch, in_h, in_w = _fm_ndim2_check(self.module_intf.operands[0].shape_out, "CHW")
+            in_ch, in_h, in_w = _fm_ndim2_check(
+                self.module_intf.operands[0].shape_out, "CHW"
+            )
             self.module_intf.operands[0].shape_change((in_ch, in_h))
         in_ch, in_h = self.module_intf.operands[0].shape_out
         cout = cin = in_ch
@@ -1555,7 +1577,9 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
             syn2 = FullConnSyn(
                 n_delays[i],
                 pool2d,
-                weights=_pool2d_semimap((cin, in_h), self.shape_out, self.kernel_size, self.stride),
+                weights=_pool2d_semimap(
+                    (cin, in_h), self.shape_out, self.kernel_size, self.stride
+                ),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
             )
@@ -1568,7 +1592,7 @@ def build(self, network: DynSysGroup, delay: int, **build_options) -> BuiltCompo
 
 
 def _spike_func_sadd_ssub(
-        vjt: VoltageType, pos_thres: int, reset_v: Optional[int] = None
+    vjt: VoltageType, pos_thres: int, reset_v: Optional[int] = None
 ) -> tuple[NeuOutType, VoltageType]:
     """Function `spike_func()` in spiking addition & subtraction."""
     # Fire
@@ -1590,7 +1614,7 @@ def _spike_func_sadd_ssub(
 
 
 def _spike_func_avg_pool(
-        vjt: VoltageType, pos_thres: int
+    vjt: VoltageType, pos_thres: int
 ) -> tuple[NeuOutType, VoltageType]:
     """Function `spike_func()` in spiking addition & subtraction."""
     # Fire
@@ -1607,7 +1631,7 @@ def _spike_func_avg_pool(
 
 
 def _sum_inputs_sadd_ssub(
-        x1: NeuOutType, x2: NeuOutType, f1: int, f2: int, vjt_pre: VoltageType, strict: bool
+    x1: NeuOutType, x2: NeuOutType, f1: int, f2: int, vjt_pre: VoltageType, strict: bool
 ) -> VoltageType:
     """Function `sum_input()` for spiking addition & subtraction."""
     incoming_v = (vjt_pre + x1 * f1 + x2 * f2).astype(VOLTAGE_DTYPE)
@@ -1645,7 +1669,7 @@ def _transpose2d_mapping(op_shape: tuple[int, ...]) -> WeightType:
 
 
 def _transpose3d_mapping(
-        op_shape: tuple[int, ...], axes: tuple[int, ...]
+    op_shape: tuple[int, ...], axes: tuple[int, ...]
 ) -> WeightType:
     """Get the mapping matrix for transpose of 3d array.
 
@@ -1684,10 +1708,10 @@ def _delay_mapping(h: int, cin: int, n: int) -> WeightType:
 
 
 def _pool2d_semimap(
-        in_shape: _Size2Type,
-        out_shape: _Size2Type,
-        kernel_size: WeightType,
-        stride: _Size2Type,
+    in_shape: _Size2Type,
+    out_shape: _Size2Type,
+    kernel_size: WeightType,
+    stride: _Size2Type,
 ) -> WeightType:
     cout = cin = in_shape[0]
     kh, kw = kernel_size
@@ -1697,5 +1721,5 @@ def _pool2d_semimap(
     for i in range(cout):
         for j in range(cin):
             for k in range(oh):
-                mt[j * ih + k * stride[1]:j * ih + k * stride[1] + kh, i * oh + k] = 1
+                mt[j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k] = 1
     return mt
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index e99068d5..180dfcd2 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -14,9 +14,9 @@
     CoreMode,
     HwConfig,
     InputWidthFormat,
+    MaxPoolingEnable,
     SNNModeEnable,
     SpikeWidthFormat,
-    MaxPoolingEnable,
     get_core_mode,
 )
 
@@ -329,6 +329,7 @@ def _truncate() -> VoltageType:
                 return (vj << (8 - self.bit_truncation)) & _mask(8)
             else:
                 return (vj >> (self.bit_truncation - 8)) & _mask(8)
+
         v_truncated = np.where(
             self.thres_mode == TM.EXCEED_POSITIVE, _truncate(), self._vjt0
         )
@@ -591,7 +592,7 @@ def __getitem__(self, index) -> "NeuronSubView":
         return NeuronSubView(self, index)
 
     def shape_change(self, new_shape: Shape) -> None:
-        #print(self.name,"shape change")
+        # print(self.name,"shape change")
         self._n_neuron = shape2num(new_shape)
         self._shape = as_shape(new_shape)
         self._vjt = self.init_param(0).astype(np.int32)
@@ -603,8 +604,8 @@ def shape_change(self, new_shape: Shape) -> None:
         self.y = self.init_param(0).astype(np.int32)
         self.set_reset_value("y", self.y)
         self.delay_registers = np.zeros(
-                (HwConfig.N_TIMESLOT_MAX,) + self._inner_spike.shape, dtype=np.bool_
-            )
+            (HwConfig.N_TIMESLOT_MAX,) + self._inner_spike.shape, dtype=np.bool_
+        )
         self.set_reset_value("delay_registers", self.delay_registers)
 
         return
diff --git a/paibox/components/synapses/__init__.py b/paibox/components/synapses/__init__.py
index 0ae6e3e9..61177dfc 100644
--- a/paibox/components/synapses/__init__.py
+++ b/paibox/components/synapses/__init__.py
@@ -1,2 +1,2 @@
-from .base import FullConnectedSyn, FullConnSyn, Conv2dHalfRollSyn, MaxPool2dSemiMapSyn
+from .base import Conv2dHalfRollSyn, FullConnectedSyn, FullConnSyn, MaxPool2dSemiMapSyn
 from .transforms import ConnType
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 8600824d..c95260f6 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -328,21 +328,20 @@ def __init__(
         )
 
 
-
 class Conv2dHalfRollSyn(FullConnectedSyn):
 
     def __init__(
-            self,
-            source: Union[NeuDyn, InputProj],
-            dest: Neuron,
-            kernel: np.ndarray,
-            stride: tuple[int, int],
-            padding: tuple[int, int],
-            order: _KOrder4d = "OIHW",
-            name: Optional[str] = None,
+        self,
+        source: Union[NeuDyn, InputProj],
+        dest: Neuron,
+        kernel: np.ndarray,
+        stride: tuple[int, int],
+        padding: tuple[int, int],
+        order: _KOrder4d = "OIHW",
+        name: Optional[str] = None,
     ) -> None:
         super().__init__(source, dest, name)
-        #print("进入halfroll")
+        # print("进入halfroll")
         if order == "IOHW":
             _kernel = np.swapaxes(kernel, 0, 1)
         else:
@@ -360,7 +359,10 @@ def __init__(
         if in_ch != in_channels:
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
-        self.comm = Conv2dHalfForward((in_ch, in_h), (out_channels, out_h), _kernel, stride, padding)
+        self.comm = Conv2dHalfForward(
+            (in_ch, in_h), (out_channels, out_h), _kernel, stride, padding
+        )
+
 
 class ConvTranspose1dSyn(FullConnectedSyn):
     _spatial_ndim: ClassVar[int] = 1
@@ -468,14 +470,15 @@ def __init__(
             (in_h, in_w), (out_h, out_w), _kernel, stride, padding, output_padding
         )
 
+
 class MaxPool2dSemiMapSyn(FullConnectedSyn):
 
     def __init__(
-            self,
-            source: Union[NeuDyn, InputProj],
-            dest: Neuron,
-            weights: DataArrayType = 1,
-            name: Optional[str] = None,
+        self,
+        source: Union[NeuDyn, InputProj],
+        dest: Neuron,
+        weights: DataArrayType = 1,
+        name: Optional[str] = None,
     ) -> None:
         super().__init__(source, dest, name)
-        self.comm = _CompareMax((self.num_in, self.num_out), weights)
\ No newline at end of file
+        self.comm = _CompareMax((self.num_in, self.num_out), weights)
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 61594193..021220d4 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -61,11 +61,11 @@ def _fm_ndim2_check(fm_shape: SizeAnyType, fm_order: _Order3d) -> Size3Type:
 
 
 def _conv1d_unroll(
-        in_shape: Size1Type,
-        out_shape: Size1Type,
-        kernel: WeightType,
-        stride: Size1Type,
-        padding: Size1Type,
+    in_shape: Size1Type,
+    out_shape: Size1Type,
+    kernel: WeightType,
+    stride: Size1Type,
+    padding: Size1Type,
 ) -> WeightType:
     """Unroll the kernel of 1d convolution into a matrix."""
     cout, cin, kl = kernel.shape
@@ -80,9 +80,9 @@ def _conv1d_unroll(
         for ch_idx in np.ndindex(kernel.shape[:2]):
             # [0] -> o_ch, [1] -> i_ch
             zeros_image[
-            i * stride[0] + ch_idx[1] * il: i * stride[0] + ch_idx[1] * il + kl,
-            ch_idx[0],
-            i,
+                i * stride[0] + ch_idx[1] * il : i * stride[0] + ch_idx[1] * il + kl,
+                ch_idx[0],
+                i,
             ] = kernel[ch_idx[0], ch_idx[1], :]
 
         # if fm_order == "CL":
@@ -100,19 +100,19 @@ def _conv1d_unroll(
     nil = in_shape[0]
     w_unrolled = np.zeros((cin * nil, cout * ol), dtype=kernel.dtype)
     for i in range(cin):
-        w_unrolled[i * nil: i * nil + nil, :] = w_unrolled_np[
-                                                i * il + padding[0]: i * il + il - padding[0], :
-                                                ]
+        w_unrolled[i * nil : i * nil + nil, :] = w_unrolled_np[
+            i * il + padding[0] : i * il + il - padding[0], :
+        ]
 
     return w_unrolled
 
 
 def _conv2d_unroll(
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        kernel: WeightType,
-        stride: Size2Type,
-        padding: Size2Type,
+    in_shape: Size2Type,
+    out_shape: Size2Type,
+    kernel: WeightType,
+    stride: Size2Type,
+    padding: Size2Type,
 ) -> WeightType:
     """Unroll the kernel of 2d convolution into a matrix."""
     cout, cin, kh, kw = kernel.shape
@@ -133,21 +133,21 @@ def _conv2d_unroll(
             for ch_idx in np.ndindex(kernel.shape[:2]):
                 # [0] -> o_ch, [1] -> i_ch
                 zeros_image[
-                i * stride[0]
-                + ch_idx[1] * ih: i * stride[0]
-                                  + ch_idx[1] * ih
-                                  + kh,
-                j * stride[1]
-                + ch_idx[0] * iw: j * stride[1]
-                                  + ch_idx[0] * iw
-                                  + kw,
-                i * ow + j,
+                    i * stride[0]
+                    + ch_idx[1] * ih : i * stride[0]
+                    + ch_idx[1] * ih
+                    + kh,
+                    j * stride[1]
+                    + ch_idx[0] * iw : j * stride[1]
+                    + ch_idx[0] * iw
+                    + kw,
+                    i * ow + j,
                 ] = kernel[ch_idx[0], ch_idx[1], :, :]
 
             t = (
                 zeros_image[:, :, i * ow + j]
-                    .reshape(cin * ih, cout, iw)
-                    .transpose(1, 0, 2)
+                .reshape(cin * ih, cout, iw)
+                .transpose(1, 0, 2)
             )
             # else:
             #     # (cin*ih, cout, iw) -> (cout, cin, ih, iw)
@@ -168,15 +168,15 @@ def _conv2d_unroll(
 
     for i in range(cin):
         for j in range(nih):
-            w_unrolled[i * nin_size + j * niw: i * nin_size + j * niw + niw, :] = (
+            w_unrolled[i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
                 w_unrolled_np[
-                i * in_size
-                + (padding[0] + j) * iw
-                + padding[1]: i * in_size
-                              + (padding[0] + j) * iw
-                              + padding[1]
-                              + niw,
-                :,
+                    i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1] : i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1]
+                    + niw,
+                    :,
                 ]
             )
 
@@ -184,15 +184,15 @@ def _conv2d_unroll(
 
 
 def _conv2d_halfroll(
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        kernel: WeightType,
-        stride: Size2Type,
-        padding: Size2Type,
+    in_shape: Size2Type,
+    out_shape: Size2Type,
+    kernel: WeightType,
+    stride: Size2Type,
+    padding: Size2Type,
 ) -> WeightType:
     cout, cin, kh = kernel.shape
     ih = in_shape[1] + 2 * padding[0]
-    #ih = in_shape[1]
+    # ih = in_shape[1]
     o_ch, oh = out_shape
     w_np = np.zeros((cin * ih, cout * oh), dtype=kernel.dtype)
     for i in range(cout):
@@ -201,22 +201,30 @@ def _conv2d_halfroll(
                 for k in range(oh):
                     # w_np[j*ih+padding[0]*(stride[1]-1)+k*stride[1]:j*ih+padding[1]*(stride[1]-1)+k*stride[1]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
                     # w_np[j*ih+stride[1]*(padding[0]+k)-padding[0]:j*ih+stride[1]*(padding[0]+k)-padding[0]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
-                    w_np[j * ih + k * stride[1]:j * ih + k * stride[1] + kh, i * oh + k] = kernel[i, j, :]
+                    w_np[
+                        j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k
+                    ] = kernel[i, j, :]
             else:
                 for k in range(oh):
-                    w_np[j * ih + k * stride[1]:j * ih + k * stride[1] + kh, i * oh + k] = kernel[i, j, :]
-            w_np= np.delete(w_np, np.concatenate((np.arange(padding[0]), np.arange(ih-padding[0], ih))), axis=0)
+                    w_np[
+                        j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k
+                    ] = kernel[i, j, :]
+            w_np = np.delete(
+                w_np,
+                np.concatenate((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
+                axis=0,
+            )
     return w_np
 
 
 def _pool2d_kernel_unroll(
-        channels: int,
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        ksize: Size2Type,
-        stride: Size2Type,
-        padding: Size2Type,
-        # fm_order: str,
+    channels: int,
+    in_shape: Size2Type,
+    out_shape: Size2Type,
+    ksize: Size2Type,
+    stride: Size2Type,
+    padding: Size2Type,
+    # fm_order: str,
 ) -> WeightType:
     kh, kw = ksize
     ih = in_shape[0] + 2 * padding[0]
@@ -232,8 +240,8 @@ def _pool2d_kernel_unroll(
             zeros_image = np.zeros((channels * ih, iw * channels), dtype=np.bool_)
             for i_ch in range(channels):
                 zeros_image[
-                (i * stride[0] + i_ch * ih): (i * stride[0] + i_ch * ih) + kh,
-                (j * stride[1] + i_ch * iw): (j * stride[1] + i_ch * iw) + kw,
+                    (i * stride[0] + i_ch * ih) : (i * stride[0] + i_ch * ih) + kh,
+                    (j * stride[1] + i_ch * iw) : (j * stride[1] + i_ch * iw) + kw,
                 ] = 1
 
             temp = zeros_image.reshape((channels * ih, channels, iw)).transpose(1, 0, 2)
@@ -247,15 +255,15 @@ def _pool2d_kernel_unroll(
 
     for i in range(channels):
         for j in range(nih):
-            w_unrolled[i * nin_size + j * niw: i * nin_size + j * niw + niw, :] = (
+            w_unrolled[i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
                 w_unrolled_np[
-                i * in_size
-                + (padding[0] + j) * iw
-                + padding[1]: i * in_size
-                              + (padding[0] + j) * iw
-                              + padding[1]
-                              + niw,
-                :,
+                    i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1] : i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1]
+                    + niw,
+                    :,
                 ]
             )
 
@@ -263,13 +271,13 @@ def _pool2d_kernel_unroll(
 
 
 def _func_pool2d(
-        x_chw: NeuOutType,
-        out_shape: Size2Type,
-        ksize: Size2Type,
-        stride: Size2Type,
-        padding: Size2Type,
-        type: str,
-        threshold: int,
+    x_chw: NeuOutType,
+    out_shape: Size2Type,
+    ksize: Size2Type,
+    stride: Size2Type,
+    padding: Size2Type,
+    type: str,
+    threshold: int,
 ) -> NeuOutType:
     xcin, xh, xw = x_chw.shape
     kh, kw = ksize
@@ -292,17 +300,17 @@ def _func_pool2d(
                 if type == "avg":
                     out[c, i, j] = np.sum(
                         x_padded[
-                        c,
-                        stride[0] * i: stride[0] * i + kh,
-                        stride[1] * j: stride[1] * j + kw,
+                            c,
+                            stride[0] * i : stride[0] * i + kh,
+                            stride[1] * j : stride[1] * j + kw,
                         ]
                     )
                 else:
                     out[c, i, j] = np.max(
                         x_padded[
-                        c,
-                        stride[0] * i: stride[0] * i + kh,
-                        stride[1] * j: stride[1] * j + kw,
+                            c,
+                            stride[0] * i : stride[0] * i + kh,
+                            stride[1] * j : stride[1] * j + kw,
                         ]
                     )
 
@@ -315,11 +323,11 @@ def _func_pool2d(
 
 
 def _conv1d_faster(
-        x_cl: NeuOutType,
-        out_shape: Size1Type,
-        kernel: WeightType,
-        stride: Size1Type,
-        padding: Size1Type,
+    x_cl: NeuOutType,
+    out_shape: Size1Type,
+    kernel: WeightType,
+    stride: Size1Type,
+    padding: Size1Type,
 ) -> SynOutType:
     """Faster 1d convolution.
 
@@ -346,12 +354,12 @@ def _conv1d_faster(
 
 
 def _conv2d_faster(
-        x_chw: NeuOutType,
-        out_shape: Size2Type,
-        kernel: WeightType,
-        stride: Size2Type,
-        padding: Size2Type,
-        # fm_order: str,
+    x_chw: NeuOutType,
+    out_shape: Size2Type,
+    kernel: WeightType,
+    stride: Size2Type,
+    padding: Size2Type,
+    # fm_order: str,
 ) -> SynOutType:
     """Faster 2d convolution.
 
@@ -382,12 +390,12 @@ def _conv2d_faster(
 
 
 def _convtranspose1d_unroll(
-        in_shape: Size1Type,
-        out_shape: Size1Type,
-        kernel: WeightType,
-        stride: Size1Type,
-        padding: Size1Type,
-        output_padding: Size1Type,
+    in_shape: Size1Type,
+    out_shape: Size1Type,
+    kernel: WeightType,
+    stride: Size1Type,
+    padding: Size1Type,
+    output_padding: Size1Type,
 ) -> WeightType:
     """Unroll the kernel of 1d transposed convolution into a matrix.
 
@@ -408,12 +416,12 @@ def _convtranspose1d_unroll(
         for ch_idx in np.ndindex(kernel_flip.shape[:2]):
             # [0] -> o_ch, [1] -> i_ch
             zeros_image[
-            i * stride_transpose
-            + ch_idx[1] * il: i * stride_transpose
-                              + ch_idx[1] * il
-                              + kl,
-            ch_idx[0],
-            i,
+                i * stride_transpose
+                + ch_idx[1] * il : i * stride_transpose
+                + ch_idx[1] * il
+                + kl,
+                ch_idx[0],
+                i,
             ] = kernel_flip[ch_idx[0], ch_idx[1], :]
 
         t = zeros_image[:, :, i].T
@@ -425,9 +433,9 @@ def _convtranspose1d_unroll(
     nil = in_shape[0] + (in_shape[0] - 1) * (stride[0] - 1)
     w_unrolled_nk = np.zeros((cin * nil, cout * ol), dtype=kernel.dtype)
     for i in range(cin):
-        w_unrolled_nk[i * nil: i * nil + nil, :] = w_unrolled_np[
-                                                   i * il + kl - 1: i * il + kl - 1 + nil, :
-                                                   ]
+        w_unrolled_nk[i * nil : i * nil + nil, :] = w_unrolled_np[
+            i * il + kl - 1 : i * il + kl - 1 + nil, :
+        ]
 
     # stripe
     w_reshaped = w_unrolled_nk.reshape((cin, nil, cout, ol))
@@ -438,7 +446,7 @@ def _convtranspose1d_unroll(
     # padding
     # w_unrolled : (cin, in_shape[0], cout, ol - output_padding[0])
     w_unrolled = (
-        w_unrolled_ns[:, :, :, padding[0]: (-1 * padding[0])]
+        w_unrolled_ns[:, :, :, padding[0] : (-1 * padding[0])]
         if padding[0] > 0
         else w_unrolled_ns
     )
@@ -453,12 +461,12 @@ def _convtranspose1d_unroll(
 
 
 def _convtranspose2d_unroll(
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        kernel: WeightType,
-        stride: Size2Type,
-        padding: Size2Type,
-        output_padding: Size2Type,
+    in_shape: Size2Type,
+    out_shape: Size2Type,
+    kernel: WeightType,
+    stride: Size2Type,
+    padding: Size2Type,
+    output_padding: Size2Type,
 ) -> WeightType:
     """Unroll the kernel of 2d transposed convolution into a matrix."""
     kernel_flip = np.flip(kernel, axis=(2, 3))
@@ -482,21 +490,21 @@ def _convtranspose2d_unroll(
             for ch_idx in np.ndindex(kernel_flip.shape[:2]):
                 # [0] -> o_ch, [1] -> i_ch
                 zeros_image[
-                i * stride_transpose[0]
-                + ch_idx[1] * ih: i * stride_transpose[0]
-                                  + ch_idx[1] * ih
-                                  + kh,
-                j * stride_transpose[1]
-                + ch_idx[0] * iw: j * stride_transpose[1]
-                                  + ch_idx[0] * iw
-                                  + kw,
-                i * ow + j,
+                    i * stride_transpose[0]
+                    + ch_idx[1] * ih : i * stride_transpose[0]
+                    + ch_idx[1] * ih
+                    + kh,
+                    j * stride_transpose[1]
+                    + ch_idx[0] * iw : j * stride_transpose[1]
+                    + ch_idx[0] * iw
+                    + kw,
+                    i * ow + j,
                 ] = kernel_flip[ch_idx[0], ch_idx[1], :, :]
 
             t = (
                 zeros_image[:, :, i * ow + j]
-                    .reshape(cin * ih, cout, iw)
-                    .transpose(1, 0, 2)
+                .reshape(cin * ih, cout, iw)
+                .transpose(1, 0, 2)
             )
             for o_ch in range(cout):
                 w_unrolled_np[:, i * ow + j + o_ch * out_size] = t[o_ch].ravel()
@@ -549,12 +557,12 @@ def _convtranspose2d_unroll(
 
 
 def _convtranspose1d_faster(
-        x_cl: NeuOutType,
-        out_shape: Size1Type,
-        kernel: WeightType,
-        stride: Size1Type,
-        padding: Size1Type,
-        output_padding: Size1Type,
+    x_cl: NeuOutType,
+    out_shape: Size1Type,
+    kernel: WeightType,
+    stride: Size1Type,
+    padding: Size1Type,
+    output_padding: Size1Type,
 ) -> SynOutType:
     # (C, L)
     xc, xl = x_cl.shape
@@ -593,7 +601,7 @@ def _convtranspose1d_faster(
     out = out.T
 
     # inverse padding : (cout, (xl-1)*stride+kernel) -> (cout, (xl-1)*stride+kernel-2*padding)
-    out = out[:, padding[0]: (-1 * padding[0])] if padding[0] > 0 else out
+    out = out[:, padding[0] : (-1 * padding[0])] if padding[0] > 0 else out
 
     # output_padding
     out = np.pad(out, ((0, 0), (0, output_padding[0])), mode="constant")
@@ -602,12 +610,12 @@ def _convtranspose1d_faster(
 
 
 def _convtranspose2d_faster(
-        x_chw: NeuOutType,
-        out_shape: Size2Type,
-        kernel: WeightType,
-        stride: Size2Type,
-        padding: Size2Type,
-        output_padding: Size2Type,
+    x_chw: NeuOutType,
+    out_shape: Size2Type,
+    kernel: WeightType,
+    stride: Size2Type,
+    padding: Size2Type,
+    output_padding: Size2Type,
 ) -> SynOutType:
     # (C, H, W)
     xc, xh, xw = x_chw.shape
@@ -653,10 +661,10 @@ def _convtranspose2d_faster(
     # padding & output_padding
     # inverse padding
     out = out[
-          :,
-          padding[0]: (-1 * padding[0]) if padding[0] > 0 else None,
-          padding[1]: (-1 * padding[1]) if padding[1] > 0 else None,
-          ]
+        :,
+        padding[0] : (-1 * padding[0]) if padding[0] > 0 else None,
+        padding[1] : (-1 * padding[1]) if padding[1] > 0 else None,
+    ]
     # output_padding
     out = np.pad(
         out, ((0, 0), (0, output_padding[0]), (0, output_padding[1])), mode="constant"
@@ -666,7 +674,7 @@ def _convtranspose2d_faster(
 
 
 def _1d_im2col(
-        x_padded: NeuOutType, ol: int, kl: int, stride: Size1Type
+    x_padded: NeuOutType, ol: int, kl: int, stride: Size1Type
 ) -> NDArray[np.int64]:
     cols = np.zeros((ol, x_padded.shape[0] * kl), dtype=np.int64)
 
@@ -674,14 +682,14 @@ def _1d_im2col(
 
     idx = 0
     for i in range(0, pl - kl + 1, stride[0]):
-        cols[idx] = x_padded[:, i: i + kl].ravel()
+        cols[idx] = x_padded[:, i : i + kl].ravel()
         idx += 1
 
     return cols
 
 
 def _2d_im2col(
-        x_padded: NeuOutType, oh: int, ow: int, kh: int, kw: int, stride: Size2Type
+    x_padded: NeuOutType, oh: int, ow: int, kh: int, kw: int, stride: Size2Type
 ) -> NDArray[np.int64]:
     cols = np.zeros((oh * ow, x_padded.shape[0] * kh * kw), dtype=np.int64)
 
@@ -690,7 +698,7 @@ def _2d_im2col(
     idx = 0
     for i in range(0, ph - kh + 1, stride[0]):
         for j in range(0, pw - kw + 1, stride[1]):
-            cols[idx] = x_padded[:, i: i + kh, j: j + kw].ravel()
+            cols[idx] = x_padded[:, i : i + kh, j : j + kw].ravel()
             idx += 1
 
     return cols
diff --git a/paibox/components/synapses/synapses.py b/paibox/components/synapses/synapses.py
index c7a644ea..8c52d7eb 100644
--- a/paibox/components/synapses/synapses.py
+++ b/paibox/components/synapses/synapses.py
@@ -95,15 +95,15 @@ def __init__(
 
 class Conv1d(Conv1dSyn):
     def __init__(
-            self,
-            source: Union[Neuron, InputProj],
-            dest: Neuron,
-            kernel: np.ndarray,
-            *,
-            stride: _Size1Type = 1,
-            padding: _Size1Type = 0,
-            kernel_order: _KOrder3d = "OIL",
-            name: Optional[str] = None,
+        self,
+        source: Union[Neuron, InputProj],
+        dest: Neuron,
+        kernel: np.ndarray,
+        *,
+        stride: _Size1Type = 1,
+        padding: _Size1Type = 0,
+        kernel_order: _KOrder3d = "OIL",
+        name: Optional[str] = None,
     ) -> None:
         """1d convolution synapses in fully-unrolled format.
 
@@ -137,15 +137,15 @@ def __init__(
 
 class Conv2d(Conv2dSyn):
     def __init__(
-            self,
-            source: Union[Neuron, InputProj],
-            dest: Neuron,
-            kernel: np.ndarray,
-            *,
-            stride: _Size2Type = 1,
-            padding: _Size2Type = 0,
-            kernel_order: _KOrder4d = "OIHW",
-            name: Optional[str] = None,
+        self,
+        source: Union[Neuron, InputProj],
+        dest: Neuron,
+        kernel: np.ndarray,
+        *,
+        stride: _Size2Type = 1,
+        padding: _Size2Type = 0,
+        kernel_order: _KOrder4d = "OIHW",
+        name: Optional[str] = None,
     ) -> None:
         """2d convolution synapses in fully-unrolled format.
 
@@ -179,16 +179,16 @@ def __init__(
 
 class ConvTranspose1d(ConvTranspose1dSyn):
     def __init__(
-            self,
-            source: Union[Neuron, InputProj],
-            dest: Neuron,
-            kernel: np.ndarray,
-            *,
-            stride: _Size1Type = 1,
-            padding: _Size1Type = 0,
-            output_padding: _Size1Type = 0,
-            kernel_order: _KOrder3d = "OIL",
-            name: Optional[str] = None,
+        self,
+        source: Union[Neuron, InputProj],
+        dest: Neuron,
+        kernel: np.ndarray,
+        *,
+        stride: _Size1Type = 1,
+        padding: _Size1Type = 0,
+        output_padding: _Size1Type = 0,
+        kernel_order: _KOrder3d = "OIL",
+        name: Optional[str] = None,
     ) -> None:
         """1d transposed convolution synapses in fully-unrolled format.
 
@@ -226,16 +226,16 @@ def __init__(
 
 class ConvTranspose2d(ConvTranspose2dSyn):
     def __init__(
-            self,
-            source: Union[Neuron, InputProj],
-            dest: Neuron,
-            kernel: np.ndarray,
-            *,
-            stride: _Size2Type = 1,
-            padding: _Size2Type = 0,
-            output_padding: _Size2Type = 0,
-            kernel_order: _KOrder4d = "OIHW",
-            name: Optional[str] = None,
+        self,
+        source: Union[Neuron, InputProj],
+        dest: Neuron,
+        kernel: np.ndarray,
+        *,
+        stride: _Size2Type = 1,
+        padding: _Size2Type = 0,
+        output_padding: _Size2Type = 0,
+        kernel_order: _KOrder4d = "OIHW",
+        name: Optional[str] = None,
     ) -> None:
         """2d transposed convolution synapses in fully-unrolled format.
 
@@ -272,4 +272,3 @@ def __init__(
             kernel_order,
             name,
         )
-
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 323fcdae..d5aa57cc 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -22,8 +22,8 @@
     _conv1d_faster,
     _conv1d_unroll,
     _conv2d_faster,
-    _conv2d_unroll,
     _conv2d_halfroll,
+    _conv2d_unroll,
     _convtranspose1d_faster,
     _convtranspose1d_unroll,
     _convtranspose2d_faster,
@@ -68,7 +68,6 @@ class ConnType(Enum):
     """All-to-all connection."""
 
 
-
 def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
     """Convert raw weights to `np.ndarray` coarsely (without optimization).
 
@@ -397,6 +396,7 @@ def connectivity(self):
             self.in_shape, self.out_shape, self.weights, self.stride, self.padding
         )
 
+
 class Conv2dHalfForward(Transform):
     def __init__(
         self,
@@ -419,7 +419,6 @@ def __init__(
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         return x @ self.connectivity
 
-
     @property
     def connectivity(self):
         return _conv2d_halfroll(
diff --git a/paibox/mixin.py b/paibox/mixin.py
index d8fdd620..ea056c71 100644
--- a/paibox/mixin.py
+++ b/paibox/mixin.py
@@ -165,7 +165,6 @@ def max_inputs(self, *args, **kwargs) -> VoltageType:
         return np.asarray(output, dtype=VOLTAGE_DTYPE)
 
 
-
 class TimeRelatedNode(MixIn):
     """Add time-related properties for `NeuDyn` & `InputProj`."""
 
diff --git a/paibox/network.py b/paibox/network.py
index bb26d994..04f41c12 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -74,7 +74,12 @@ def build_fmodule(
         cls, network: "DynSysGroup", **build_options
     ) -> dict[NeuModule, BuiltComponentType]:
         try:
-            from .components.functional import Conv2dSemiMap, Delay_FullConn, MaxPool2dSemiMap, AvgPool2dSemiMap
+            from .components.functional import (
+                AvgPool2dSemiMap,
+                Conv2dSemiMap,
+                Delay_FullConn,
+                MaxPool2dSemiMap,
+            )
         except ImportError:
             Conv2dSemiMap, Delay_FullConn = None
         generated = dict()
@@ -84,17 +89,17 @@ def build_fmodule(
             if Conv2dSemiMap is not None and isinstance(module, Conv2dSemiMap):
                 generated[module] = module.build(network, delay, **build_options)
                 if module.stride[1] != 1:
-                    delay = delay*module.stride[1]
+                    delay = delay * module.stride[1]
             elif Delay_FullConn is not None and isinstance(module, Delay_FullConn):
                 generated[module] = module.build(network, delay, **build_options)
             elif MaxPool2dSemiMap is not None and isinstance(module, MaxPool2dSemiMap):
                 generated[module] = module.build(network, delay, **build_options)
                 if module.stride[1] != 1:
-                    delay = delay*module.stride[1]
+                    delay = delay * module.stride[1]
             elif AvgPool2dSemiMap is not None and isinstance(module, AvgPool2dSemiMap):
                 generated[module] = module.build(network, delay, **build_options)
                 if module.stride[1] != 1:
-                    delay = delay*module.stride[1]
+                    delay = delay * module.stride[1]
             else:
                 generated[module] = module.build(network, **build_options)
 
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 0aa9f532..167188bb 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -8,11 +8,11 @@
 from paibox.network import DynSysGroup
 from paibox.simulator.utils import _conv2d_faster_fp32
 from paibox.utils import as_shape, shape2num, typical_round
-from tests.components.utils import max_pooling, avg_pooling
+from tests.components.utils import avg_pooling, max_pooling
 
 
 def _assert_build_fmodule(
-        network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
+    network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
 ):
     nodes = network.nodes().subset(DynamicSys).unique()
     assert len(nodes) == n_node_bef_build
@@ -413,16 +413,16 @@ def test_SpikingSub_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingPool2d(
-            self,
-            shape,
-            channels,
-            ksize,
-            stride,
-            padding,
-            threshold,
-            fm_order,
-            pool_type,
-            p_binomial,
+        self,
+        shape,
+        channels,
+        ksize,
+        stride,
+        padding,
+        threshold,
+        fm_order,
+        pool_type,
+        p_binomial,
     ):
         from tests.shared_networks import SpikingPool2d_Net
 
@@ -507,14 +507,14 @@ def test_SpikingPool2d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingAvgPool2dWithV(
-            self,
-            shape,
-            channels,
-            ksize,
-            stride,
-            padding,
-            threshold,
-            p_binomial,
+        self,
+        shape,
+        channels,
+        ksize,
+        stride,
+        padding,
+        threshold,
+        p_binomial,
     ):
         """NOTE: This function is a native implementation of SNNs and is therefore not  \
             compared to the ANN implementation."""
@@ -651,18 +651,35 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
     @pytest.mark.parametrize(
         "shape, kernel, stride, padding",
         [
-            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-             [1, 1], [0, 0]),
-            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-             [1, 2], [0, 0]),
-            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-             [2, 1], [0, 0]),
-            ((3, 11), np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-             [2, 2], [0, 0]),
+            (
+                (3, 11),
+                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+                [1, 1],
+                [0, 0],
+            ),
+            (
+                (3, 11),
+                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+                [1, 2],
+                [0, 0],
+            ),
+            (
+                (3, 11),
+                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+                [2, 1],
+                [0, 0],
+            ),
+            (
+                (3, 11),
+                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
+                [2, 2],
+                [0, 0],
+            ),
         ],
     )
     def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
         from tests.shared_networks import Conv2dSemiMap_Net1
+
         net1 = Conv2dSemiMap_Net1(shape, kernel, stride, padding)
         conv = net1.conv1
         generated = DynSysGroup.build_fmodule(net1)
@@ -674,7 +691,9 @@ def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
         for i in range(15):
             pb.FRONTEND_ENV.save(data1=inpb[:, i, :])
             sim1.run(1)
-        expected = _conv2d_faster_fp32(np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0]))
+        expected = _conv2d_faster_fp32(
+            np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0])
+        )
         expected = np.array(expected, dtype=np.int32)
         if (expected >> 8).all() > 0:
             expected = np.full_like(expected, ((1 << 8) - 1))
@@ -686,17 +705,43 @@ def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
     @pytest.mark.parametrize(
         "shape, kernel, stride, padding, out_feature, weight",
         [
-            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-             [1, 1], [0, 0], 10, np.random.randint(-5, 5, size=(7 * 7, 10), dtype=np.int8)),
-            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-             [1, 2], [0, 0], 10, np.random.randint(-5, 5, size=(4 * 4, 10), dtype=np.int8)),
-            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-             [2, 1], [0, 0], 10, np.random.randint(-5, 5, size=(3 * 3, 10), dtype=np.int8)),
-            ((1, 11), np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-             [2, 2], [0, 0], 10, np.random.randint(-5, 5, size=(2 * 2, 10), dtype=np.int8)),
+            (
+                (1, 11),
+                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+                [1, 1],
+                [0, 0],
+                10,
+                np.random.randint(-5, 5, size=(7 * 7, 10), dtype=np.int8),
+            ),
+            (
+                (1, 11),
+                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+                [1, 2],
+                [0, 0],
+                10,
+                np.random.randint(-5, 5, size=(4 * 4, 10), dtype=np.int8),
+            ),
+            (
+                (1, 11),
+                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+                [2, 1],
+                [0, 0],
+                10,
+                np.random.randint(-5, 5, size=(3 * 3, 10), dtype=np.int8),
+            ),
+            (
+                (1, 11),
+                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
+                [2, 2],
+                [0, 0],
+                10,
+                np.random.randint(-5, 5, size=(2 * 2, 10), dtype=np.int8),
+            ),
         ],
     )
-    def test_Conv2dSemiMap_Net(self, shape, kernel, stride, padding, out_feature, weight):
+    def test_Conv2dSemiMap_Net(
+        self, shape, kernel, stride, padding, out_feature, weight
+    ):
         from tests.shared_networks import Conv2dSemiMap_Net2
 
         net2 = Conv2dSemiMap_Net2(shape, kernel, stride, padding, out_feature, weight)
@@ -715,10 +760,14 @@ def test_Conv2dSemiMap_Net(self, shape, kernel, stride, padding, out_feature, we
         for i in range(17):
             pb.FRONTEND_ENV.save(data1=inpb[0][i])
             sim2.run(1)
-        expected = _conv2d_faster_fp32(np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0]))
+        expected = _conv2d_faster_fp32(
+            np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0])
+        )
         expected[expected < 0] = 0
 
-        expected = _conv2d_faster_fp32(expected, kernel, _pair(stride[1]), _pair(padding[1]))
+        expected = _conv2d_faster_fp32(
+            expected, kernel, _pair(stride[1]), _pair(padding[1])
+        )
         expected[expected < 0] = 0
 
         expected = np.array(expected, dtype=np.int32)
@@ -734,14 +783,39 @@ def test_Conv2dSemiMap_Net(self, shape, kernel, stride, padding, out_feature, we
     @pytest.mark.parametrize(
         "shape, kernel_size, stride, weight, pool_type",
         [
-            ((1, 8), (2, 2), [1, 1], np.random.randint(-5, 5, size=(6 * 6, 2), dtype=np.int8), "avg"),
-            ((1, 8), (2, 2), [2, 2], np.random.randint(-5, 5, size=(2 * 2, 2), dtype=np.int8), "avg"),
-            ((1, 8), (2, 2), [1, 1], np.random.randint(0, 5, size=(6 * 6, 2), dtype=np.int8), "max"),
-            ((1, 8), (2, 2), [2, 2], np.random.randint(0, 5, size=(2 * 2, 2), dtype=np.int8), "max"),
+            (
+                (1, 8),
+                (2, 2),
+                [1, 1],
+                np.random.randint(-5, 5, size=(6 * 6, 2), dtype=np.int8),
+                "avg",
+            ),
+            (
+                (1, 8),
+                (2, 2),
+                [2, 2],
+                np.random.randint(-5, 5, size=(2 * 2, 2), dtype=np.int8),
+                "avg",
+            ),
+            (
+                (1, 8),
+                (2, 2),
+                [1, 1],
+                np.random.randint(0, 5, size=(6 * 6, 2), dtype=np.int8),
+                "max",
+            ),
+            (
+                (1, 8),
+                (2, 2),
+                [2, 2],
+                np.random.randint(0, 5, size=(2 * 2, 2), dtype=np.int8),
+                "max",
+            ),
         ],
     )
     def test_Pool2dSemiMap(self, shape, kernel_size, stride, weight, pool_type):
         from tests.shared_networks import Pool2dSemiMap_Net
+
         net1 = Pool2dSemiMap_Net(shape, kernel_size, stride, weight, pool_type)
         pool = net1.pool2
         linear = net1.linear1
@@ -777,8 +851,8 @@ def test_Pool2dSemiMap(self, shape, kernel_size, stride, weight, pool_type):
             else:
                 expected = expected & ((1 << 8) - 1)
             assert np.array_equal(expected, sim1.data[probe_linear][12])
-        #print(sim1.data[probe_pool])
-        #print(sim1.data[probe_linear])
+        # print(sim1.data[probe_pool])
+        # print(sim1.data[probe_linear])
 
     @pytest.mark.parametrize(
         "shape, weight1",
@@ -788,6 +862,7 @@ def test_Pool2dSemiMap(self, shape, kernel_size, stride, weight, pool_type):
     )
     def test_Linear(self, shape, weight1):
         from tests.shared_networks import Linear_Net
+
         net1 = Linear_Net(shape, weight1)
         net2 = Linear_Net(shape, weight1)
         linear = net2.linear1
diff --git a/tests/components/utils.py b/tests/components/utils.py
index 9c40b871..4feabbcf 100644
--- a/tests/components/utils.py
+++ b/tests/components/utils.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from paibox.types import SpikeType, NeuOutType
+from paibox.types import NeuOutType, SpikeType
 
 
 def maxpool2d_golden(
@@ -85,7 +85,11 @@ def avgpool2d_golden(
     return out >= threshold
 
 
-def max_pooling(input_data, kernel_size: tuple[int, int], stride:tuple[int, int],) -> NeuOutType:
+def max_pooling(
+    input_data,
+    kernel_size: tuple[int, int],
+    stride: tuple[int, int],
+) -> NeuOutType:
     """
     实现最大池化层
 
@@ -115,7 +119,11 @@ def max_pooling(input_data, kernel_size: tuple[int, int], stride:tuple[int, int]
     return output_data
 
 
-def avg_pooling(input_data, kernel_size: tuple[int, int], stride:tuple[int, int],) -> NeuOutType:
+def avg_pooling(
+    input_data,
+    kernel_size: tuple[int, int],
+    stride: tuple[int, int],
+) -> NeuOutType:
     """
     实现平均池化层
 
@@ -141,6 +149,8 @@ def avg_pooling(input_data, kernel_size: tuple[int, int], stride:tuple[int, int]
                 y1 = j * stride[1]
                 x2 = x1 + kernel_size[0]
                 y2 = y1 + kernel_size[1]
-                output_data[c, i, j] = np.sum(input_data[c, x1:x2, y1:y2]) >> ((kernel_height * kernel_width).bit_length()-1)
+                output_data[c, i, j] = np.sum(input_data[c, x1:x2, y1:y2]) >> (
+                    (kernel_height * kernel_width).bit_length() - 1
+                )
 
     return output_data
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 36291107..77610ec2 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -216,13 +216,15 @@ def __init__(self, shape, axes):
         self.probe1 = pb.Probe(self.t3d, "spike")
         self.probe2 = pb.Probe(self.n2, "spike")
 
+
 class Conv2dSemiMap_Net1(pb.DynSysGroup):
     def __init__(self, shape, kernel, stride, padding):
         super().__init__()
 
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.Conv2dSemiMap(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
-
+        self.conv1 = pb.Conv2dSemiMap(
+            self.i1, kernel, stride[0], padding[0], tick_wait_start=1
+        )
 
 
 class Conv2dSemiMap_Net2(pb.DynSysGroup):
@@ -230,43 +232,60 @@ def __init__(self, shape, kernel, stride, padding, out_feature, weight):
         super().__init__()
 
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.Conv2dSemiMap(self.i1, kernel, stride[0], padding[0], tick_wait_start=1)
-        self.conv2 = pb.Conv2dSemiMap(self.conv1, kernel, stride[1], padding[1], tick_wait_start=3)
+        self.conv1 = pb.Conv2dSemiMap(
+            self.i1, kernel, stride[0], padding[0], tick_wait_start=1
+        )
+        self.conv2 = pb.Conv2dSemiMap(
+            self.conv1, kernel, stride[1], padding[1], tick_wait_start=3
+        )
         self.linear1 = pb.DelayFullConn(
             self.conv2,
             out_feature,
             weights=weight,
             bias=0,
             conn_type=pb.SynConnType.All2All,
-            tick_wait_start=5
+            tick_wait_start=5,
         )
 
+
 class Pool2dSemiMap_Net(pb.DynSysGroup):
     def __init__(self, shape, kernel_size, stride, weight, pool_type):
         super().__init__()
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
         if pool_type == "avg":
-            self.pool1 = pb.AvgPool2dSemiMap(self.i1, kernel_size, stride[0], tick_wait_start=1)
-            self.pool2 = pb.AvgPool2dSemiMap(self.pool1, kernel_size, stride[1], tick_wait_start=3)
+            self.pool1 = pb.AvgPool2dSemiMap(
+                self.i1, kernel_size, stride[0], tick_wait_start=1
+            )
+            self.pool2 = pb.AvgPool2dSemiMap(
+                self.pool1, kernel_size, stride[1], tick_wait_start=3
+            )
         else:
-            self.pool1 = pb.MaxPool2dSemiMap(self.i1, kernel_size, stride[0], tick_wait_start=1)
-            self.pool2 = pb.MaxPool2dSemiMap(self.pool1, kernel_size, stride[1], tick_wait_start=3)
+            self.pool1 = pb.MaxPool2dSemiMap(
+                self.i1, kernel_size, stride[0], tick_wait_start=1
+            )
+            self.pool2 = pb.MaxPool2dSemiMap(
+                self.pool1, kernel_size, stride[1], tick_wait_start=3
+            )
         self.linear1 = pb.DelayFullConn(
             self.pool2,
             2,
             weights=weight,
             bias=0,
             conn_type=pb.SynConnType.All2All,
-            tick_wait_start=5
+            tick_wait_start=5,
         )
 
+
 class Linear_Net(pb.DynSysGroup):
     def __init__(self, shape, weight1):
         super().__init__()
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.linear1 = pb.Linear(self.i1, 10, weights=weight1, bias=2, conn_type=pb.SynConnType.All2All)
+        self.linear1 = pb.Linear(
+            self.i1, 10, weights=weight1, bias=2, conn_type=pb.SynConnType.All2All
+        )
         self.probe1 = pb.Probe(self.linear1, "spike")
 
+
 class ANNNetwork(pb.Network):
     def __init__(self):
         super().__init__()

From 816039a9833db75ce4e6379c540eba17c319c0bd Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 23 Jul 2024 16:32:32 +0800
Subject: [PATCH 034/187] =?UTF-8?q?=F0=9F=94=A5=20deprecate=20`NoDecay`=20?=
 =?UTF-8?q?synapse?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/__init__.py                     |  1 -
 paibox/components/synapses/synapses.py | 24 ------------------------
 2 files changed, 25 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index ba9fd9a1..0540f9f5 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -33,7 +33,6 @@
 from .components.synapses.synapses import ConvTranspose2d as ConvTranspose2d
 from .components.synapses.synapses import FullConn as FullConn
 from .components.synapses.synapses import MatMul2d as MatMul2d
-from .components.synapses.synapses import NoDecay as NoDecay
 from .context import FRONTEND_ENV as FRONTEND_ENV
 from .network import DynSysGroup as DynSysGroup
 from .network import Network as Network
diff --git a/paibox/components/synapses/synapses.py b/paibox/components/synapses/synapses.py
index 8c52d7eb..ec301485 100644
--- a/paibox/components/synapses/synapses.py
+++ b/paibox/components/synapses/synapses.py
@@ -1,10 +1,8 @@
-import sys
 from typing import Optional, Union
 
 import numpy as np
 
 from paibox.base import NeuDyn
-from paibox.exceptions import PAIBoxDeprecationWarning
 from paibox.types import DataArrayType
 
 from ..neuron import Neuron
@@ -20,11 +18,6 @@
 from .conv_utils import _pair, _single
 from .transforms import ConnType
 
-if sys.version_info >= (3, 13):
-    from warnings import deprecated
-else:
-    from typing_extensions import deprecated
-
 __all__ = [
     "FullConn",
     "MatMul2d",
@@ -57,23 +50,6 @@ def __init__(
         super().__init__(source, dest, weights, conn_type, name=name)
 
 
-@deprecated(
-    "'NoDecay' will be removed in a future version. Use 'FullConn' instead.",
-    category=PAIBoxDeprecationWarning,
-)
-class NoDecay(FullConnSyn):
-    def __init__(
-        self,
-        source: Union[NeuDyn, InputProj],
-        dest: NeuDyn,
-        weights: DataArrayType = 1,
-        *,
-        conn_type: ConnType = ConnType.All2All,
-        name: Optional[str] = None,
-    ) -> None:
-        super().__init__(source, dest, weights, conn_type, name=name)
-
-
 class MatMul2d(FullConnSyn):
     def __init__(
         self,

From 923e82e8fc24c62015eb8dae95f6bed323ea5d6c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 17:06:49 +0000
Subject: [PATCH 035/187] :arrow_up: auto update by pre-commit hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 24.4.2 → 24.8.0](https://github.com/psf/black/compare/24.4.2...24.8.0)
- [github.com/python-poetry/poetry: 1.8.3 → 1.8.0](https://github.com/python-poetry/poetry/compare/1.8.3...1.8.0)
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3e03697b..0e18fa2d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
         stages: [commit]
 
   - repo: https://github.com/psf/black
-    rev: 24.4.2
+    rev: 24.8.0
     hooks:
       - id: black
         stages: [commit]
@@ -50,7 +50,7 @@ repos:
       - id: no-commit-to-branch
 
   - repo: https://github.com/python-poetry/poetry
-    rev: 1.8.3
+    rev: 1.8.0
     hooks:
       - id: poetry-check
       - id: poetry-export

From 5a7663f9c4ee4f1718c6406cecea111c0b08a2ca Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 23 Jul 2024 21:50:26 +0800
Subject: [PATCH 036/187] =?UTF-8?q?=E2=9C=A8=20add=20functions=20of=20unro?=
 =?UTF-8?q?lling=20pooling=20kernel?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/synapses/conv_utils.py | 325 ++++++++++++++---------
 1 file changed, 196 insertions(+), 129 deletions(-)

diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 021220d4..6cadac69 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -9,6 +9,7 @@
 from paibox.types import (
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
+    WEIGHT_DTYPE,
     NeuOutType,
     SynOutType,
     WeightType,
@@ -77,6 +78,7 @@ def _conv1d_unroll(
     zeros_image = np.zeros((cin * il, cout, ol), dtype=kernel.dtype)
 
     for i in range(ol):
+        zeros_image.fill(0)
         for ch_idx in np.ndindex(kernel.shape[:2]):
             # [0] -> o_ch, [1] -> i_ch
             zeros_image[
@@ -116,8 +118,6 @@ def _conv2d_unroll(
 ) -> WeightType:
     """Unroll the kernel of 2d convolution into a matrix."""
     cout, cin, kh, kw = kernel.shape
-
-    # ih, iw = in_shape
     ih = in_shape[0] + 2 * padding[0]
     iw = in_shape[1] + 2 * padding[1]
     oh, ow = out_shape
@@ -144,7 +144,7 @@ def _conv2d_unroll(
                     i * ow + j,
                 ] = kernel[ch_idx[0], ch_idx[1], :, :]
 
-            t = (
+            temp = (
                 zeros_image[:, :, i * ow + j]
                 .reshape(cin * ih, cout, iw)
                 .transpose(1, 0, 2)
@@ -158,7 +158,7 @@ def _conv2d_unroll(
             #     )
 
             for o_ch in range(cout):
-                w_unrolled_np[:, i * ow + j + o_ch * out_size] = t[o_ch].ravel()
+                w_unrolled_np[:, i * ow + j + o_ch * out_size] = temp[o_ch].ravel()
 
     # Remove the part of the padding in the w_unrolled_no_padding
     # That is, remove useless weight in the w_unrolled_no_padding
@@ -217,111 +217,6 @@ def _conv2d_halfroll(
     return w_np
 
 
-def _pool2d_kernel_unroll(
-    channels: int,
-    in_shape: Size2Type,
-    out_shape: Size2Type,
-    ksize: Size2Type,
-    stride: Size2Type,
-    padding: Size2Type,
-    # fm_order: str,
-) -> WeightType:
-    kh, kw = ksize
-    ih = in_shape[0] + 2 * padding[0]
-    iw = in_shape[1] + 2 * padding[1]
-    oh, ow = out_shape
-    in_size = ih * iw
-    out_size = oh * ow
-
-    w_unrolled_np = np.zeros((channels * in_size, channels * out_size), dtype=np.bool_)
-
-    for i in range(oh):
-        for j in range(ow):
-            zeros_image = np.zeros((channels * ih, iw * channels), dtype=np.bool_)
-            for i_ch in range(channels):
-                zeros_image[
-                    (i * stride[0] + i_ch * ih) : (i * stride[0] + i_ch * ih) + kh,
-                    (j * stride[1] + i_ch * iw) : (j * stride[1] + i_ch * iw) + kw,
-                ] = 1
-
-            temp = zeros_image.reshape((channels * ih, channels, iw)).transpose(1, 0, 2)
-
-            for o_ch in range(channels):
-                w_unrolled_np[:, i * ow + j + o_ch * oh * ow] = temp[o_ch].ravel()
-
-    nih, niw = in_shape
-    nin_size = nih * niw
-    w_unrolled = np.zeros((channels * nin_size, channels * out_size), dtype=np.bool_)
-
-    for i in range(channels):
-        for j in range(nih):
-            w_unrolled[i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
-                w_unrolled_np[
-                    i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1] : i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1]
-                    + niw,
-                    :,
-                ]
-            )
-
-    return w_unrolled
-
-
-def _func_pool2d(
-    x_chw: NeuOutType,
-    out_shape: Size2Type,
-    ksize: Size2Type,
-    stride: Size2Type,
-    padding: Size2Type,
-    type: str,
-    threshold: int,
-) -> NeuOutType:
-    xcin, xh, xw = x_chw.shape
-    kh, kw = ksize
-    oh, ow = out_shape
-    cout = xcin
-
-    assert (xh + padding[0] * 2 - kh) // stride[0] + 1 == oh
-    assert (xw + padding[1] * 2 - kw) // stride[1] + 1 == ow
-
-    out = np.zeros((cout, oh, ow), dtype=np.int32)
-    x_padded = np.pad(
-        x_chw,
-        ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-        mode="constant",
-    )
-
-    for c in range(cout):
-        for i in range(oh):
-            for j in range(ow):
-                if type == "avg":
-                    out[c, i, j] = np.sum(
-                        x_padded[
-                            c,
-                            stride[0] * i : stride[0] * i + kh,
-                            stride[1] * j : stride[1] * j + kw,
-                        ]
-                    )
-                else:
-                    out[c, i, j] = np.max(
-                        x_padded[
-                            c,
-                            stride[0] * i : stride[0] * i + kh,
-                            stride[1] * j : stride[1] * j + kw,
-                        ]
-                    )
-
-    if type == "avg":
-        result = out >= threshold
-    else:
-        result = out
-
-    return result.astype(NEUOUT_U8_DTYPE)
-
-
 def _conv1d_faster(
     x_cl: NeuOutType,
     out_shape: Size1Type,
@@ -329,13 +224,8 @@ def _conv1d_faster(
     stride: Size1Type,
     padding: Size1Type,
 ) -> SynOutType:
-    """Faster 1d convolution.
-
-    XXX: The case where the input feature map is in 'LC' order is not considered for the time being.
-    """
-    xc, xl = x_cl.shape
-    # (O, I, L)
-    cout, cin, kl = kernel.shape
+    """Faster 1d convolution."""
+    cout, cin, kl = kernel.shape  # (O, I, L)
 
     x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])), mode="constant")
 
@@ -361,13 +251,8 @@ def _conv2d_faster(
     padding: Size2Type,
     # fm_order: str,
 ) -> SynOutType:
-    """Faster 2d convolution.
-
-    XXX: The case where the input feature map is in 'HWC' order is not considered for the time being.
-    """
-    xc, xh, xw = x_chw.shape
-    # (O, I, H, W)
-    cout, cin, kh, kw = kernel.shape
+    """Faster 2d convolution."""
+    cout, cin, kh, kw = kernel.shape  # (O, I, H, W)
 
     x_padded = np.pad(
         x_chw,
@@ -407,12 +292,13 @@ def _convtranspose1d_unroll(
     il = in_shape[0] + (in_shape[0] - 1) * (stride[0] - 1) + (kl - 1) * 2
     ol = out_shape[0] + 2 * padding[0] - output_padding[0]
 
-    w_unrolled_np = np.zeros((cin * il, cout * ol), dtype=kernel_flip.dtype)
-    zeros_image = np.zeros((cin * il, cout, ol), dtype=kernel_flip.dtype)
+    w_unrolled_np = np.zeros((cin * il, cout * ol), dtype=kernel.dtype)
+    zeros_image = np.zeros((cin * il, cout, ol), dtype=kernel.dtype)
 
     # stride has been processed in the input matrix
     stride_transpose = 1
     for i in range(ol):
+        zeros_image.fill(0)
         for ch_idx in np.ndindex(kernel_flip.shape[:2]):
             # [0] -> o_ch, [1] -> i_ch
             zeros_image[
@@ -476,17 +362,16 @@ def _convtranspose2d_unroll(
     iw = in_shape[1] + (in_shape[1] - 1) * (stride[1] - 1) + (kw - 1) * 2
     oh = out_shape[0] + 2 * padding[0] - output_padding[0]
     ow = out_shape[1] + 2 * padding[1] - output_padding[1]
-    # ih, iw = in_shape
-    # oh, ow = out_shape
     in_size = ih * iw
     out_size = oh * ow
 
-    w_unrolled_np = np.zeros((cin * in_size, cout * out_size), dtype=kernel_flip.dtype)
-    zeros_image = np.zeros((cin * ih, iw * cout, out_size), dtype=kernel_flip.dtype)
+    w_unrolled_np = np.zeros((cin * in_size, cout * out_size), dtype=kernel.dtype)
+    zeros_image = np.zeros((cin * ih, iw * cout, out_size), dtype=kernel.dtype)
 
     stride_transpose = (1, 1)
     for i in range(oh):
         for j in range(ow):
+            zeros_image.fill(0)
             for ch_idx in np.ndindex(kernel_flip.shape[:2]):
                 # [0] -> o_ch, [1] -> i_ch
                 zeros_image[
@@ -702,3 +587,185 @@ def _2d_im2col(
             idx += 1
 
     return cols
+
+
+def _pool1d_kernel_unroll(
+    channels: int,
+    in_shape: Size1Type,
+    out_shape: Size1Type,
+    ksize: Size1Type,
+    stride: Size1Type,
+    padding: Size1Type,
+) -> WeightType:
+    kl = ksize[0]
+    il = in_shape[0] + 2 * padding[0]
+    ol = out_shape[0]
+
+    w_unrolled_np = np.zeros((channels * il, channels * ol), dtype=WEIGHT_DTYPE)
+    zeros_image = np.zeros((channels * il, channels), dtype=WEIGHT_DTYPE)
+
+    for i in range(ol):
+        zeros_image.fill(0)
+        for i_ch in range(channels):
+            zeros_image[
+                i * stride[0] + i_ch * il : i * stride[0] + i_ch * il + kl, i_ch
+            ] = 1
+
+        temp = zeros_image.T
+
+        for o_ch in range(channels):
+            w_unrolled_np[:, i + o_ch * ol] = temp[o_ch].ravel()
+
+    nil = in_shape[0]
+    w_unrolled = np.zeros((channels * nil, channels * ol), dtype=WEIGHT_DTYPE)
+
+    for i in range(channels):
+        w_unrolled[i * nil : i * nil + nil, :] = w_unrolled_np[
+            i * il + padding[0] : i * il - padding[0] + il, :
+        ]
+
+    return w_unrolled
+
+
+def _pool2d_kernel_unroll(
+    channels: int,
+    in_shape: Size2Type,
+    out_shape: Size2Type,
+    ksize: Size2Type,
+    stride: Size2Type,
+    padding: Size2Type,
+    # fm_order: str,
+) -> WeightType:
+    kh, kw = ksize
+    ih = in_shape[0] + 2 * padding[0]
+    iw = in_shape[1] + 2 * padding[1]
+    oh, ow = out_shape
+    in_size = ih * iw
+    out_size = oh * ow
+
+    w_unrolled_np = np.zeros(
+        (channels * in_size, channels * out_size), dtype=WEIGHT_DTYPE
+    )
+    zeros_image = np.zeros((channels * ih, iw * channels), dtype=WEIGHT_DTYPE)
+
+    for i in range(oh):
+        for j in range(ow):
+            zeros_image.fill(0)
+            for i_ch in range(channels):
+                zeros_image[
+                    i * stride[0] + i_ch * ih : i * stride[0] + i_ch * ih + kh,
+                    j * stride[1] + i_ch * iw : j * stride[1] + i_ch * iw + kw,
+                ] = 1
+
+            temp = zeros_image.reshape((channels * ih, channels, iw)).transpose(1, 0, 2)
+
+            for o_ch in range(channels):
+                w_unrolled_np[:, i * ow + j + o_ch * out_size] = temp[o_ch].ravel()
+
+    nih, niw = in_shape
+    nin_size = nih * niw
+    w_unrolled = np.zeros(
+        (channels * nin_size, channels * out_size), dtype=WEIGHT_DTYPE
+    )
+
+    for i in range(channels):
+        for j in range(nih):
+            w_unrolled[i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
+                w_unrolled_np[
+                    i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1] : i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1]
+                    + niw,
+                    :,
+                ]
+            )
+
+    return w_unrolled
+
+
+def _func_pool1d(
+    x_cl: NeuOutType,
+    out_shape: Size1Type,
+    ksize: Size1Type,
+    stride: Size1Type,
+    padding: Size1Type,
+    type: str,
+    threshold: int,
+) -> NeuOutType:
+    xcin, xl = x_cl.shape
+    kl = ksize[0]
+    ol = out_shape[0]
+    cout = xcin
+
+    assert (xl + padding[0] * 2 - kl) // stride[0] + 1 == ol
+
+    out = np.zeros((cout, ol), dtype=np.int32)
+    x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])), mode="constant")
+
+    for c in range(cout):
+        for i in range(ol):
+            if type == "avg":
+                out[c, i] = np.sum(x_padded[c, stride[0] * i : stride[0] * i + kl])
+            else:
+                out[c, i] = np.max(x_padded[c, stride[0] * i : stride[0] * i + kl])
+
+    if type == "avg":
+        result = out >= threshold
+    else:
+        result = out
+
+    return result.astype(NEUOUT_U8_DTYPE)
+
+
+def _func_pool2d(
+    x_chw: NeuOutType,
+    out_shape: Size2Type,
+    ksize: Size2Type,
+    stride: Size2Type,
+    padding: Size2Type,
+    type: str,
+    threshold: int,
+) -> NeuOutType:
+    xcin, xh, xw = x_chw.shape
+    kh, kw = ksize
+    oh, ow = out_shape
+    cout = xcin
+
+    assert (xh + padding[0] * 2 - kh) // stride[0] + 1 == oh
+    assert (xw + padding[1] * 2 - kw) // stride[1] + 1 == ow
+
+    out = np.zeros((cout, oh, ow), dtype=np.int32)
+    x_padded = np.pad(
+        x_chw,
+        ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
+        mode="constant",
+    )
+
+    for c in range(cout):
+        for i in range(oh):
+            for j in range(ow):
+                if type == "avg":
+                    out[c, i, j] = np.sum(
+                        x_padded[
+                            c,
+                            stride[0] * i : stride[0] * i + kh,
+                            stride[1] * j : stride[1] * j + kw,
+                        ]
+                    )
+                else:
+                    out[c, i, j] = np.max(
+                        x_padded[
+                            c,
+                            stride[0] * i : stride[0] * i + kh,
+                            stride[1] * j : stride[1] * j + kw,
+                        ]
+                    )
+
+    if type == "avg":
+        result = out >= threshold
+    else:
+        result = out
+
+    return result.astype(NEUOUT_U8_DTYPE)

From 90eb06aa93664d3fa59c9dae8bc24122be0b032f Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 23 Jul 2024 21:51:44 +0800
Subject: [PATCH 037/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20abstract=20the=20'?=
 =?UTF-8?q?=5FConvNdForward'=20class=20to=20reduce=20repetitive=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/synapses/transforms.py | 98 +++++++++---------------
 1 file changed, 35 insertions(+), 63 deletions(-)

diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index d5aa57cc..54710611 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -17,7 +17,7 @@
 )
 from paibox.utils import is_shape, shape2num, typical_round
 
-from .conv_types import Size1Type, Size2Type, SizeAnyType
+from .conv_types import _SizeAnyType, Size1Type, Size2Type, SizeAnyType
 from .conv_utils import (
     _conv1d_faster,
     _conv1d_unroll,
@@ -28,7 +28,9 @@
     _convtranspose1d_unroll,
     _convtranspose2d_faster,
     _convtranspose2d_unroll,
+    _func_pool1d,
     _func_pool2d,
+    _pool1d_kernel_unroll,
     _pool2d_kernel_unroll,
 )
 
@@ -321,24 +323,32 @@ def is_T(self) -> bool:
         return self.axes == (1, 0)
 
 
-class Conv1dForward(Transform):
+class _ConvNdForward(Transform):
     def __init__(
         self,
-        in_shape: Size1Type,
-        out_shape: Size1Type,
+        in_shape: SizeAnyType,
+        out_shape: SizeAnyType,
         kernel: np.ndarray,
-        stride: Size1Type,
-        padding: Size1Type,
-        # fm_order: _Order2d,
+        stride: _SizeAnyType = 0,
+        padding: _SizeAnyType = 0,
+        output_padding: _SizeAnyType = 0,
     ) -> None:
         self.in_shape = in_shape
         self.out_shape = out_shape
         self.stride = stride
         self.padding = padding
-        # self.fm_order = fm_order
+        self.output_padding = output_padding
 
         super().__init__(kernel)
 
+
+class Conv1dForward(_ConvNdForward):
+
+    in_shape: Size1Type
+    out_shape: Size1Type
+    stride: Size1Type
+    padding: Size1Type
+
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]
 
@@ -359,23 +369,11 @@ def connectivity(self):
         )
 
 
-class Conv2dForward(Transform):
-    def __init__(
-        self,
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        kernel: np.ndarray,
-        stride: Size2Type,
-        padding: Size2Type,
-        # fm_order: _Order3d,
-    ) -> None:
-        self.in_shape = in_shape
-        self.out_shape = out_shape
-        self.stride = stride
-        self.padding = padding
-        # self.fm_order = fm_order
-
-        super().__init__(kernel)
+class Conv2dForward(_ConvNdForward):
+    in_shape: Size2Type
+    out_shape: Size2Type
+    stride: Size2Type
+    padding: Size2Type
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]
@@ -426,25 +424,12 @@ def connectivity(self):
         )
 
 
-class ConvTranspose1dForward(Transform):
-    def __init__(
-        self,
-        in_shape: Size1Type,
-        out_shape: Size1Type,
-        kernel: np.ndarray,
-        stride: Size1Type,
-        padding: Size1Type,
-        output_padding: Size1Type,
-        # fm_order: _Order2d,
-    ) -> None:
-        self.in_shape = in_shape
-        self.out_shape = out_shape
-        self.stride = stride
-        self.padding = padding
-        self.output_padding = output_padding
-        # self.fm_order = fm_order
-
-        super().__init__(kernel)
+class ConvTranspose1dForward(_ConvNdForward):
+    in_shape: Size1Type
+    out_shape: Size1Type
+    stride: Size1Type
+    padding: Size1Type
+    output_padding: Size1Type
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]
@@ -476,25 +461,12 @@ def connectivity(self):
         )
 
 
-class ConvTranspose2dForward(Transform):
-    def __init__(
-        self,
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        kernel: np.ndarray,
-        stride: Size2Type,
-        padding: Size2Type,
-        output_padding: Size2Type,
-        # fm_order: _Order3d,
-    ) -> None:
-        self.in_shape = in_shape
-        self.out_shape = out_shape
-        self.stride = stride
-        self.padding = padding
-        self.output_padding = output_padding
-        # self.fm_order = fm_order
-
-        super().__init__(kernel)
+class ConvTranspose2dForward(_ConvNdForward):
+    in_shape: Size2Type
+    out_shape: Size2Type
+    stride: Size2Type
+    padding: Size2Type
+    output_padding: Size2Type
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1]

From 189f879464d7b702eea91f1f0e2ab16299f6235e Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 23 Jul 2024 21:52:36 +0800
Subject: [PATCH 038/187] =?UTF-8?q?=E2=9C=A8=20pooling=20layer=20is=20abst?=
 =?UTF-8?q?racted=20into=20=5FNdForward=20classes=20&=201/2d=20are=20added?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/synapses/transforms.py | 55 ++++++++++++++++++++----
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 54710611..8ba80523 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -498,16 +498,15 @@ def connectivity(self):
         )
 
 
-class _Pool2dForward(Transform):
+class _PoolNdForward(Transform):
     def __init__(
         self,
         channels: int,
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        kernel_size: Size2Type,
-        stride: Size2Type,
-        padding: Size2Type,
-        # fm_order: _Order3d,
+        in_shape: SizeAnyType,
+        out_shape: SizeAnyType,
+        kernel_size: SizeAnyType,
+        stride: _SizeAnyType,
+        padding: _SizeAnyType,
         pool_type: Literal["avg", "max"],
         threshold: Optional[int] = None,
     ) -> None:
@@ -517,8 +516,8 @@ def __init__(
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
-        # self.fm_order = fm_order
         self.pool_type = pool_type
+
         if isinstance(threshold, int):
             self.threshold = threshold
         else:
@@ -526,6 +525,46 @@ def __init__(
 
         super().__init__(1)
 
+
+class _Pool1dForward(_PoolNdForward):
+    in_shape: Size1Type
+    out_shape: Size1Type
+    ksize: Size1Type
+    stride: Size1Type
+    padding: Size1Type
+
+    def __call__(self, x: NeuOutType, *args, **kwargs) -> NeuOutType:
+        _x = x.reshape((self.channels,) + self.in_shape)
+
+        return _func_pool1d(
+            _x,
+            self.out_shape,
+            self.ksize,
+            self.stride,
+            self.padding,
+            self.pool_type,
+            self.threshold,
+        )
+
+    @property
+    def connectivity(self):
+        return _pool1d_kernel_unroll(
+            self.channels,
+            self.in_shape,
+            self.out_shape,
+            self.ksize,
+            self.stride,
+            self.padding,
+        )
+
+
+class _Pool2dForward(_PoolNdForward):
+    in_shape: Size2Type
+    out_shape: Size2Type
+    ksize: Size2Type
+    stride: Size2Type
+    padding: Size2Type
+
     def __call__(self, x: NeuOutType, *args, **kwargs) -> NeuOutType:
         # if self.fm_order == "HWC":
         #     # (N,) -> (H, W, C) -> (C, H, W)

From caa507ea8a2afb2548cbbc99104f7d4d9cad3cfb Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 23 Jul 2024 21:54:07 +0800
Subject: [PATCH 039/187] =?UTF-8?q?=E2=9C=85=20add=201/2d=20pooling=20gold?=
 =?UTF-8?q?en=20reference=20functions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/utils.py | 80 +++++++++++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/tests/components/utils.py b/tests/components/utils.py
index 4feabbcf..f84c0cab 100644
--- a/tests/components/utils.py
+++ b/tests/components/utils.py
@@ -2,7 +2,39 @@
 
 import numpy as np
 
-from paibox.types import NeuOutType, SpikeType
+from paibox.types import WEIGHT_DTYPE, NeuOutType, SpikeType
+
+
+def maxpool1d_golden(
+    x: SpikeType,
+    kernel_size: tuple[int],
+    stride: Optional[tuple[int]],
+    padding: tuple[int],
+    fm_order: str,
+) -> SpikeType:
+    if fm_order == "LC":
+        _x = x.T
+    else:
+        _x = x
+
+    xcin, il = _x.shape
+    kl = kernel_size[0]
+    _stride = stride if stride is not None else kernel_size
+    ol = (il - kl + 2 * padding[0]) // _stride[0] + 1
+    cout = xcin
+
+    out = np.zeros((cout, ol), dtype=x.dtype)
+    x_padded = np.pad(
+        _x,
+        ((0, 0), (padding[0], padding[0])),
+        mode="constant",
+    )
+
+    for c in range(cout):
+        for i in range(ol):
+            out[c, i] = np.max(x_padded[c, _stride[0] * i : _stride[0] * i + kl])
+
+    return out
 
 
 def maxpool2d_golden(
@@ -45,10 +77,43 @@ def maxpool2d_golden(
     return out
 
 
+def avgpool1d_golden(
+    x: SpikeType,
+    kernel_size: tuple[int],
+    stride: Optional[tuple[int]],
+    padding: tuple[int],
+    fm_order: str,
+    threshold: int,
+) -> SpikeType:
+    if fm_order == "LC":
+        _x = x.T
+    else:
+        _x = x
+
+    xcin, il = _x.shape
+    kl = kernel_size[0]
+    _stride = stride if stride is not None else kernel_size
+    ol = (il - kl + 2 * padding[0]) // _stride[0] + 1
+    cout = xcin
+
+    out = np.zeros((cout, ol), dtype=WEIGHT_DTYPE)
+    x_padded = np.pad(
+        _x,
+        ((0, 0), (padding[0], padding[0])),
+        mode="constant",
+    )
+
+    for c in range(cout):
+        for i in range(ol):
+            out[c, i] = np.sum(x_padded[c, _stride[0] * i : _stride[0] * i + kl])
+
+    return out >= threshold
+
+
 def avgpool2d_golden(
     x: SpikeType,
     kernel_size: tuple[int, int],
-    stride: tuple[int, int],
+    stride: Optional[tuple[int, int]],
     padding: tuple[int, int],
     fm_order: str,
     threshold: int,
@@ -60,11 +125,12 @@ def avgpool2d_golden(
 
     xcin, ih, iw = _x.shape
     kh, kw = kernel_size
-    oh = (ih - kh + 2 * padding[0]) // stride[0] + 1
-    ow = (iw - kw + 2 * padding[1]) // stride[1] + 1
+    _stride = stride if stride is not None else kernel_size
+    oh = (ih - kh + 2 * padding[0]) // _stride[0] + 1
+    ow = (iw - kw + 2 * padding[1]) // _stride[1] + 1
     cout = xcin
 
-    out = np.zeros((cout, oh, ow), dtype=np.int8)
+    out = np.zeros((cout, oh, ow), dtype=WEIGHT_DTYPE)
     x_padded = np.pad(
         _x,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
@@ -77,8 +143,8 @@ def avgpool2d_golden(
                 out[c, i, j] = np.sum(
                     x_padded[
                         c,
-                        stride[0] * i : stride[0] * i + kh,
-                        stride[1] * j : stride[1] * j + kw,
+                        _stride[0] * i : _stride[0] * i + kh,
+                        _stride[1] * j : _stride[1] * j + kw,
                     ]
                 )
 

From 7a4eaf0375a2fe88c838731ee091ffb986a8813d Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 23 Jul 2024 21:54:31 +0800
Subject: [PATCH 040/187] =?UTF-8?q?=F0=9F=94=A8=20widening=20`snn=5Fen`=20?=
 =?UTF-8?q?argument=20type=20to=20accept=20`SNNModeEnable`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 180dfcd2..3a3779f5 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -422,7 +422,7 @@ def __init__(
         tick_wait_end: int = 0,
         input_width: Union[L[1, 8], InputWidthFormat] = InputWidthFormat.WIDTH_1BIT,
         spike_width: Union[L[1, 8], SpikeWidthFormat] = SpikeWidthFormat.WIDTH_1BIT,
-        snn_en: bool = True,
+        snn_en: Union[bool, SNNModeEnable] = True,
         pool_max: bool = False,
         unrolling_factor: int = 1,
         overflow_strict: bool = False,

From a934a2f174e658160a83ee1bb9fd148667c8690f Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 24 Jul 2024 10:30:02 +0800
Subject: [PATCH 041/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20the=20`?=
 =?UTF-8?q?NeuModule`=20to=20store=20runtime=20mode=20kwds=20&=20utilize?=
 =?UTF-8?q?=20partial=20functions=20for=20setting=20SNN/ANN=20modes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/modules.py      | 31 +++++++++++++++----------------
 paibox/components/neuron/base.py  | 21 +++++++++++----------
 paibox/components/neuron/utils.py | 12 ++++++++++--
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 884f7733..f2857fec 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -1,3 +1,4 @@
+from functools import partial
 import sys
 import typing
 from collections import deque
@@ -6,21 +7,14 @@
 from typing import ClassVar, Literal, Optional, TypeVar, Union
 
 import numpy as np
-from paicorelib import (
-    TM,
-    HwConfig,
-    InputWidthFormat,
-    SNNModeEnable,
-    SpikeWidthFormat,
-    get_core_mode,
-)
+from paicorelib import CoreMode, TM, HwConfig, get_core_mode, SNNModeEnable
 
 from paibox.base import NeuDyn
 from paibox.exceptions import NotSupportedError, RegisterError, ShapeError
 from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
-from .neuron.utils import _input_width_format, _spike_width_format
+from .neuron.utils import _input_width_format, _spike_width_format, _RTModeKwds
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
@@ -97,9 +91,8 @@ class NeuModule(NeuDyn, BuildingModule):
     """#N of outputs."""
     inherent_delay: int = 0
     """Internal delay of the module, relative to the external."""
-    input_width: ClassVar[InputWidthFormat] = InputWidthFormat.WIDTH_1BIT
-    spike_width: ClassVar[SpikeWidthFormat] = SpikeWidthFormat.WIDTH_1BIT
-    snn_en: ClassVar[SNNModeEnable] = SNNModeEnable.ENABLE
+    rt_mode_kwds: _RTModeKwds
+    mode: CoreMode
 
     def __init__(
         self,
@@ -188,7 +181,6 @@ def __init__(
 
         super().__init__(**kwargs, name=name)
 
-        self.mode = get_core_mode(self.input_width, self.spike_width, self.snn_en)
         self.keep_shape = keep_shape
         self._shape_out = shape_out
         self.register_operand(*operands)
@@ -453,14 +445,21 @@ def __init__(
 
 def set_rt_mode(input_width: L[1, 8], spike_width: L[1, 8], snn_en: L[0, 1]):
     def wrapper(cls: type[_T]) -> type[_T]:
-        cls.input_width = _input_width_format(input_width)
-        cls.spike_width = _spike_width_format(spike_width)
-        cls.snn_en = SNNModeEnable(snn_en)
+        iw = _input_width_format(input_width)
+        sw = _spike_width_format(spike_width)
+        sen = SNNModeEnable(snn_en)
+
+        cls.mode = get_core_mode(iw, sw, sen)
+        cls.rt_mode_kwds = {"input_width": iw, "spike_width": sw, "snn_en": sen}
         return cls
 
     return wrapper
 
 
+set_rt_mode_snn = partial(set_rt_mode, input_width=1, spike_width=1, snn_en=1)
+set_rt_mode_ann = partial(set_rt_mode, input_width=8, spike_width=8, snn_en=0)
+
+
 def _shape_check2(
     neuron_a: Union[NeuDyn, InputProj],
     neuron_b: Union[NeuDyn, InputProj],
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 3a3779f5..37d82aef 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -46,6 +46,7 @@
     _mask,
     _spike_width_format,
     vjt_overflow,
+    _RTModeKwds,
 )
 
 __all__ = ["Neuron"]
@@ -56,9 +57,7 @@
 class MetaNeuron:
     """Meta neuron"""
 
-    input_width: InputWidthFormat
-    spike_width: SpikeWidthFormat
-    snn_en: SNNModeEnable
+    rt_mode_kwds: _RTModeKwds
     mode: CoreMode
 
     def __init__(
@@ -89,9 +88,11 @@ def __init__(
         self._shape = as_shape(shape)
         self._n_neuron = shape2num(self._shape)
 
-        self.input_width = input_width
-        self.spike_width = spike_width
-        self.snn_en = snn_en
+        self.rt_mode_kwds = {
+            "input_width": input_width,
+            "spike_width": spike_width,
+            "snn_en": snn_en,
+        }
         self.pool_max = pool_max
         # check whether the mode is valid
         self.mode = get_core_mode(input_width, spike_width, snn_en)
@@ -173,7 +174,7 @@ def _neuronal_charge(
         else:
             _v = incoming_v
 
-        if self.snn_en:
+        if self.rt_mode_kwds["snn_en"]:
             v_charged = vjt_pre + _v
         else:
             # SNN_EN=0, the previous voltage is unused
@@ -198,7 +199,7 @@ def _neuronal_leak(self, vjt: VoltageType) -> VoltageType:
 
                 `vjt` = `vjt` + \sgn{`leak_v`}* `_ld` * `_F`
         """
-        if self.snn_en:
+        if self.rt_mode_kwds["snn_en"]:
             if self.leak_direction is LDM.MODE_FORWARD:
                 _ld = 1
             else:
@@ -364,14 +365,14 @@ def update(
         # 3. Reset. Reset is performed in all modes.
         v_reset = self._neuronal_reset(v_leaked)
 
-        if self.spike_width is SpikeWidthFormat.WIDTH_8BIT:
+        if self.rt_mode_kwds["spike_width"] is SpikeWidthFormat.WIDTH_8BIT:
             # Althought the truncated voltage is of type VOLTAGE_DTYPE, its value <= uint8.
             # The voltage to truncate is the one before neuronal reset.
             v_truncated = self._bit_truncate(v_leaked)
 
         self._aux_post_hook()
 
-        if self.spike_width is SpikeWidthFormat.WIDTH_1BIT:
+        if self.rt_mode_kwds["spike_width"] is SpikeWidthFormat.WIDTH_1BIT:
             # When output width is 1 bit, bit truncation is not performed.
             return spike, v_reset
         else:
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index 479a22c8..96415df7 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -1,8 +1,8 @@
 import warnings
-from typing import Literal, Union
+from typing import Literal, Union, TypedDict
 
 import numpy as np
-from paicorelib import InputWidthFormat, SpikeWidthFormat
+from paicorelib import InputWidthFormat, SpikeWidthFormat, SNNModeEnable
 from paicorelib.framelib.utils import _mask
 from paicorelib.ram_model import (
     BIT_TRUNCATE_MAX,
@@ -105,3 +105,11 @@ def _get_neu_out_dtype(
         return SPIKE_DTYPE
     else:
         return NEUOUT_U8_DTYPE
+
+
+class _RTModeKwds(TypedDict):
+    """A typed keywords for runtime mode. Only for checking if necessary."""
+
+    input_width: InputWidthFormat
+    spike_width: SpikeWidthFormat
+    snn_en: SNNModeEnable

From dbb27d4c52a34dde2970b156168c0f31b28370ef Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 24 Jul 2024 11:10:59 +0800
Subject: [PATCH 042/187] =?UTF-8?q?=E2=9C=A8=20Add=201d=20spiking=20poolin?=
 =?UTF-8?q?g=20operators?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/__init__.py              |  13 +-
 paibox/components/_modules.py   | 392 ++++++++++++++++++++++++++++++++
 paibox/components/functional.py | 275 ++++++++++------------
 3 files changed, 519 insertions(+), 161 deletions(-)
 create mode 100644 paibox/components/_modules.py

diff --git a/paibox/__init__.py b/paibox/__init__.py
index 0540f9f5..2bc5f7fd 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -2,6 +2,8 @@
 
 from .backend import BACKEND_CONFIG as BACKEND_CONFIG
 from .backend import Mapper as Mapper
+
+# Functional modules
 from .components.functional import AvgPool2dSemiMap as AvgPool2dSemiMap
 from .components.functional import BitwiseAND as BitwiseAND
 from .components.functional import BitwiseNOT as BitwiseNOT
@@ -14,18 +16,27 @@
 from .components.functional import Linear as Linear
 from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
 from .components.functional import SpikingAdd as SpikingAdd
+from .components.functional import SpikingAvgPool1d as SpikingAvgPool1d
 from .components.functional import SpikingAvgPool2d as SpikingAvgPool2d
+from .components.functional import SpikingAvgPool1dWithV as SpikingAvgPool1dWithV
 from .components.functional import SpikingAvgPool2dWithV as SpikingAvgPool2dWithV
+from .components.functional import SpikingMaxPool1d as SpikingMaxPool1d
 from .components.functional import SpikingMaxPool2d as SpikingMaxPool2d
 from .components.functional import SpikingSub as SpikingSub
 from .components.functional import Transpose2d as Transpose2d
 from .components.functional import Transpose3d as Transpose3d
+
+# Reduced neurons
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
 from .components.neuron.neurons import SpikingRelu as SpikingRelu
 from .components.neuron.neurons import TonicSpiking as TonicSpiking
+
+# Input projection
 from .components.projection import InputProj as InputProj
+
+# Synapses
 from .components.synapses import ConnType as SynConnType
 from .components.synapses.synapses import Conv1d as Conv1d
 from .components.synapses.synapses import Conv2d as Conv2d
@@ -49,7 +60,7 @@
 from paibox import tools
 
 # Minimum required version of paicorelib
-__plib_minimum_version__ = "1.1.6"
+__plib_minimum_version__ = "1.3.0"
 
 try:
     import paicorelib as plib
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
new file mode 100644
index 00000000..5fc92953
--- /dev/null
+++ b/paibox/components/_modules.py
@@ -0,0 +1,392 @@
+from typing import Literal, Optional, Union
+
+import numpy as np
+from paicorelib import TM
+
+from paibox.base import NeuDyn
+from paibox.network import DynSysGroup
+from paibox.types import NEUOUT_U8_DTYPE, WEIGHT_DTYPE, NeuOutType, VoltageType
+from paibox.utils import arg_check_non_neg, shape2num, typical_round
+
+from .modules import (
+    BuiltComponentType,
+    FunctionalModule,
+    FunctionalModuleWithV,
+    set_rt_mode_snn,
+)
+from .neuron import Neuron
+from .neuron.neurons import *
+from .neuron.utils import vjt_overflow
+from .projection import InputProj
+from .synapses import ConnType, FullConnSyn
+from .synapses.conv_types import _Size1Type, _Size2Type
+from .synapses.conv_utils import _fm_ndim1_check, _fm_ndim2_check, _single, _pair
+from .synapses.transforms import (
+    Conv1dForward,
+    Conv2dForward,
+    _Pool1dForward,
+    _Pool2dForward,
+)
+
+__all__ = [
+    "_SpikingPool1d",
+    "_SpikingPool1dWithV",
+    "_SpikingPool2d",
+    "_SpikingPool2dWithV",
+]
+
+
+@set_rt_mode_snn()
+class _SpikingPool1d(FunctionalModule):
+    inherent_delay = 0
+
+    def __init__(
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size1Type,
+        pool_type: Literal["avg", "max"],
+        stride: Optional[_Size1Type] = None,
+        padding: _Size1Type = 0,
+        threshold: Optional[int] = None,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """Basic 1d spiking pooling."""
+        if pool_type not in ("avg", "max"):
+            raise ValueError("type of pooling must be 'avg' or 'max'.")
+
+        cin, il = _fm_ndim1_check(neuron.shape_out, "CL")
+
+        _ksize = _single(kernel_size)
+        _stride = _single(stride) if stride is not None else _ksize
+        _padding = _single(padding)
+
+        ol = (il + 2 * _padding[0] - _ksize[0]) // _stride[0] + 1
+
+        if keep_shape:
+            shape_out = (cin, ol)
+        else:
+            shape_out = (cin * ol,)
+
+        self.tfm = _Pool1dForward(
+            cin, (il,), (ol,), _ksize, _stride, _padding, pool_type, threshold
+        )
+
+        super().__init__(
+            neuron,
+            shape_out=shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        return self.tfm(x1)
+
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        if self.tfm.pool_type == "avg":
+            n1_p1d = Neuron(
+                self.shape_out,
+                leak_v=1 - self.tfm.threshold,
+                neg_threshold=0,
+                delay=self.delay_relative,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                keep_shape=self.keep_shape,
+                name=f"n0_{self.name}",
+                **self.rt_mode_kwds,
+            )
+        else:  # "max"
+            n1_p1d = SpikingRelu(
+                self.shape_out,
+                delay=self.delay_relative,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                keep_shape=self.keep_shape,
+                name=f"n0_{self.name}",
+                **self.rt_mode_kwds,
+            )
+
+        syn1 = FullConnSyn(
+            self.module_intf.operands[0],
+            n1_p1d,
+            weights=self.tfm.connectivity.astype(np.bool_),
+            conn_type=ConnType.All2All,
+            name=f"s0_{self.name}",
+        )
+
+        generated = [n1_p1d, syn1]
+        self._rebuild_out_intf(network, n1_p1d, *generated, **build_options)
+
+        return generated
+
+
+@set_rt_mode_snn()
+class _SpikingPool1dWithV(FunctionalModuleWithV):
+    inherent_delay = 0
+
+    def __init__(
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size1Type,
+        stride: Optional[_Size1Type] = None,
+        padding: _Size1Type = 0,
+        pos_thres: Optional[int] = None,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """Basic 1d spiking pooling with voltage at the previous timestep."""
+
+        cin, il = _fm_ndim1_check(neuron.shape_out, "CL")
+
+        _ksize = _single(kernel_size)
+        _kernel = np.ones((cin, cin, *_ksize), dtype=WEIGHT_DTYPE)
+        _stride = _single(stride) if stride is not None else _ksize
+        _padding = _single(padding)
+
+        ol = (il + 2 * _padding[0] - _ksize[0]) // _stride[0] + 1
+
+        if keep_shape:
+            shape_out = (cin, ol)
+        else:
+            shape_out = (cin * ol,)
+
+        if isinstance(pos_thres, int):
+            self.pos_thres = arg_check_non_neg(pos_thres, "positive threshold")
+        else:
+            self.pos_thres = typical_round(shape2num(_ksize) / 2)
+
+        self.tfm = Conv1dForward((il,), (ol,), _kernel, _stride, _padding)
+
+        super().__init__(
+            neuron,
+            shape_out=shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageType]:
+        return _spike_func_avg_pool(vjt, self.pos_thres)
+
+    def synaptic_integr(self, x1: NeuOutType, vjt_pre: VoltageType) -> VoltageType:
+        return vjt_overflow(vjt_pre + self.tfm(x1).ravel())
+
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        n1_p1d = IF(
+            self.shape_out,
+            threshold=self.pos_thres,
+            reset_v=0,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start,
+            tick_wait_end=self.tick_wait_end,
+            keep_shape=self.keep_shape,
+            name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
+        )
+
+        syn1 = FullConnSyn(
+            self.module_intf.operands[0],
+            n1_p1d,
+            weights=self.tfm.connectivity.astype(np.bool_),
+            conn_type=ConnType.All2All,
+            name=f"s0_{self.name}",
+        )
+
+        generated = [n1_p1d, syn1]
+        self._rebuild_out_intf(network, n1_p1d, *generated, **build_options)
+
+        return generated
+
+
+@set_rt_mode_snn()
+class _SpikingPool2d(FunctionalModule):
+    inherent_delay = 0
+
+    def __init__(
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        pool_type: Literal["avg", "max"],
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        threshold: Optional[int] = None,
+        # fm_order: _Order3d = "CHW",
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """Basic 2d spiking pooling."""
+        if pool_type not in ("avg", "max"):
+            raise ValueError("type of pooling must be 'avg' or 'max'.")
+
+        # if fm_order not in ("CHW", "HWC"):
+        #     raise ValueError("feature map order must be 'CHW' or 'HWC'.")
+
+        cin, ih, iw = _fm_ndim2_check(neuron.shape_out, "CHW")
+
+        _ksize = _pair(kernel_size)
+        _stride = _pair(stride) if stride is not None else _ksize
+        _padding = _pair(padding)
+
+        oh = (ih + 2 * _padding[0] - _ksize[0]) // _stride[0] + 1
+        ow = (iw + 2 * _padding[1] - _ksize[1]) // _stride[1] + 1
+
+        if keep_shape:
+            shape_out = (cin, oh, ow)
+        else:
+            shape_out = (cin * oh * ow,)
+
+        self.tfm = _Pool2dForward(
+            cin, (ih, iw), (oh, ow), _ksize, _stride, _padding, pool_type, threshold
+        )
+
+        super().__init__(
+            neuron,
+            shape_out=shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        return self.tfm(x1)
+
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        if self.tfm.pool_type == "avg":
+            n1_p2d = Neuron(
+                self.shape_out,
+                leak_v=1 - self.tfm.threshold,
+                neg_threshold=0,
+                delay=self.delay_relative,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                keep_shape=self.keep_shape,
+                name=f"n0_{self.name}",
+                **self.rt_mode_kwds,
+            )
+        else:  # "max"
+            n1_p2d = SpikingRelu(
+                self.shape_out,
+                delay=self.delay_relative,
+                tick_wait_start=self.tick_wait_start,
+                tick_wait_end=self.tick_wait_end,
+                keep_shape=self.keep_shape,
+                name=f"n0_{self.name}",
+                **self.rt_mode_kwds,
+            )
+
+        syn1 = FullConnSyn(
+            self.module_intf.operands[0],
+            n1_p2d,
+            weights=self.tfm.connectivity.astype(np.bool_),
+            conn_type=ConnType.All2All,
+            name=f"s0_{self.name}",
+        )
+
+        generated = [n1_p2d, syn1]
+        self._rebuild_out_intf(network, n1_p2d, *generated, **build_options)
+
+        return generated
+
+
+@set_rt_mode_snn()
+class _SpikingPool2dWithV(FunctionalModuleWithV):
+    inherent_delay = 0
+
+    def __init__(
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size2Type,
+        stride: Optional[_Size2Type] = None,
+        padding: _Size2Type = 0,
+        pos_thres: Optional[int] = None,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """Basic 2d spiking pooling with voltage at the previous timestep. 
+        
+        NOTE: This is not a regular average pooling operator. It is just to correspond to the operators \
+            that appear in PAIFLOW.
+        """
+        cin, ih, iw = _fm_ndim2_check(neuron.shape_out, "CHW")
+
+        _ksize = _pair(kernel_size)
+        _kernel = np.ones((cin, cin, *_ksize), dtype=WEIGHT_DTYPE)
+        _stride = _pair(stride) if stride is not None else _ksize
+        _padding = _pair(padding)
+
+        oh = (ih + 2 * _padding[0] - _ksize[0]) // _stride[0] + 1
+        ow = (iw + 2 * _padding[1] - _ksize[1]) // _stride[1] + 1
+
+        if keep_shape:
+            shape_out = (cin, oh, ow)
+        else:
+            shape_out = (cin * oh * ow,)
+
+        if isinstance(pos_thres, int):
+            self.pos_thres = arg_check_non_neg(pos_thres, "positive threshold")
+        else:
+            self.pos_thres = typical_round(shape2num(_ksize) / 2)
+
+        self.tfm = Conv2dForward((ih, iw), (oh, ow), _kernel, _stride, _padding)
+
+        super().__init__(
+            neuron,
+            shape_out=shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageType]:
+        return _spike_func_avg_pool(vjt, self.pos_thres)
+
+    def synaptic_integr(self, x1: NeuOutType, vjt_pre: VoltageType) -> VoltageType:
+        return vjt_overflow(vjt_pre + self.tfm(x1).ravel())
+
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        n1_p2d = IF(
+            self.shape_out,
+            threshold=self.pos_thres,
+            reset_v=0,
+            neg_threshold=0,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start,
+            tick_wait_end=self.tick_wait_end,
+            keep_shape=self.keep_shape,
+            name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
+        )
+
+        syn1 = FullConnSyn(
+            self.module_intf.operands[0],
+            n1_p2d,
+            weights=self.tfm.connectivity.astype(np.bool_),
+            conn_type=ConnType.All2All,
+            name=f"s0_{self.name}",
+        )
+
+        generated = [n1_p2d, syn1]
+        self._rebuild_out_intf(network, n1_p2d, *generated, **build_options)
+
+        return generated
+
+
+def _spike_func_avg_pool(
+    vjt: VoltageType, pos_thres: int
+) -> tuple[NeuOutType, VoltageType]:
+    # Fire
+    thres_mode = np.where(
+        vjt >= pos_thres,
+        TM.EXCEED_POSITIVE,
+        np.where(vjt < 0, TM.EXCEED_NEGATIVE, TM.NOT_EXCEEDED),
+    )
+    spike = thres_mode == TM.EXCEED_POSITIVE
+    # Reset
+    v_reset = np.where(thres_mode == TM.EXCEED_POSITIVE, 0, vjt)
+
+    return spike.astype(NEUOUT_U8_DTYPE), v_reset
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index f04719a9..584f6aa3 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -2,7 +2,7 @@
 import sys
 from collections.abc import Sequence
 from functools import partial
-from typing import ClassVar, Literal, Optional, Union
+from typing import ClassVar, Optional, Union
 
 import numpy as np
 from paicorelib import NTM, RM, TM
@@ -20,31 +20,25 @@
     VoltageType,
     WeightType,
 )
-from paibox.utils import (
-    arg_check_non_neg,
-    arg_check_pos,
-    as_shape,
-    shape2num,
-    typical_round,
-)
+from paibox.utils import arg_check_pos, as_shape, shape2num
 
+from ._modules import *
 from .modules import (
     BuiltComponentType,
     FunctionalModule,
     FunctionalModule2to1,
     FunctionalModule2to1WithV,
-    FunctionalModuleWithV,
     TransposeModule,
     set_rt_mode,
+    set_rt_mode_snn,
 )
 from .neuron import Neuron
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, Conv2dHalfRollSyn, FullConnSyn, MaxPool2dSemiMapSyn
-from .synapses.conv_types import _Size2Type
+from .synapses import ConnType, FullConnSyn
+from .synapses.conv_types import _Size1Type, _Size2Type
 from .synapses.conv_utils import _fm_ndim2_check, _pair
-from .synapses.transforms import Conv2dForward, _Pool2dForward
 
 if sys.version_info >= (3, 13):
     from warnings import deprecated
@@ -58,6 +52,9 @@
     "BitwiseXOR",
     "DelayChain",
     "SpikingAdd",
+    "SpikingAvgPool1d",
+    "SpikingAvgPool1dWithV",
+    "SpikingMaxPool1d",
     "SpikingAvgPool2d",
     "SpikingAvgPool2dWithV",
     "SpikingMaxPool2d",
@@ -73,7 +70,7 @@
 ]
 
 
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class BitwiseAND(FunctionalModule2to1):
     inherent_delay = 0
 
@@ -120,6 +117,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         syn1 = FullConnSyn(
@@ -143,7 +141,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class BitwiseNOT(FunctionalModule):
     inherent_delay = 0
 
@@ -189,6 +187,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         syn1 = FullConnSyn(
@@ -205,7 +204,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class BitwiseOR(FunctionalModule2to1):
     inherent_delay = 0
 
@@ -239,6 +238,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         syn1 = FullConnSyn(
@@ -262,7 +262,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class BitwiseXOR(FunctionalModule2to1):
     inherent_delay = 1
 
@@ -300,6 +300,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=False,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         identity = np.identity(self.num_out, dtype=np.int8)
@@ -328,6 +329,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n1_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         # weight of syn3, identity matrix with shape (2N, N)
@@ -368,12 +370,7 @@ def __init__(
         else:
             shape_out = (neuron.num_out,)
 
-        if chain_level < 1:
-            raise ValueError(
-                f"the level of delay chain must be positive, but got {chain_level}."
-            )
-
-        self.chain_level = chain_level
+        self.chain_level = arg_check_pos(chain_level, "chain level")
         self.inherent_delay = chain_level - 1
 
         super().__init__(
@@ -399,6 +396,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
                 tick_wait_end=self.tick_wait_end,
                 delay=1,
                 name=f"n{i}_{self.name}",
+                **self.rt_mode_kwds,
             )
             n_delaychain.append(n_delay)
 
@@ -409,6 +407,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             delay=self.delay_relative,
             name=f"n{i + 1}_{self.name}",
+            **self.rt_mode_kwds,
         )
         n_delaychain.append(n_out)  # Must append to the last.
 
@@ -437,7 +436,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class SpikingAdd(FunctionalModule2to1WithV):
     inherent_delay = 0
 
@@ -499,6 +498,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         syn1 = FullConnSyn(
@@ -522,173 +522,111 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-@set_rt_mode(1, 1, 1)
-class _SpikingPool2dWithV(FunctionalModuleWithV):
-    inherent_delay = 0
-
+class SpikingAvgPool1d(_SpikingPool1d):
     def __init__(
         self,
         neuron: Union[NeuDyn, InputProj],
-        kernel_size: _Size2Type,
-        stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
-        pos_thres: Optional[int] = None,
+        kernel_size: _Size1Type,
+        stride: Optional[_Size1Type] = None,
+        padding: _Size1Type = 0,
+        threshold: Optional[int] = None,
+        *,
         keep_shape: bool = True,
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """Basic 2d spiking pooling."""
-        # C,H,W
-        cin, ih, iw = _fm_ndim2_check(neuron.shape_out, "CHW")
-
-        _ksize = _pair(kernel_size)
-        _kernel = np.ones((cin, cin, *_ksize), dtype=np.int8)
-        _stride = _pair(stride) if stride is not None else _ksize
-        _padding = _pair(padding)
-
-        oh = (ih + 2 * _padding[0] - _ksize[0]) // _stride[0] + 1
-        ow = (iw + 2 * _padding[1] - _ksize[1]) // _stride[1] + 1
-
-        if keep_shape:
-            shape_out = (cin, oh, ow)
-        else:
-            shape_out = (cin * oh * ow,)
-
-        if isinstance(pos_thres, int):
-            self.pos_thres = arg_check_non_neg(pos_thres, "positive threshold")
-        else:
-            self.pos_thres = typical_round(shape2num(_ksize) / 2)
+        """1d average pooling for spike. The input feature map is in 'CL' order by default.
 
-        self.tfm = Conv2dForward((ih, iw), (oh, ow), _kernel, _stride, _padding)
+        Args:
+            - neuron: the target neuron to be pooled.
+            - kernel_size: the size of the window to take a max over.
+            - stride: the stride of the window. Default value is `kernel_size`.
+            - padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 1  \
+                integer.
+            - threshold: if specified, the pooling result is o = (sum of the pooling window > threshold).   \
+                Otherwise the threshold is kernel_size // 2.
 
+        NOTE: the inherent delay of the module is 0.
+        """
         super().__init__(
             neuron,
-            shape_out=shape_out,
-            keep_shape=keep_shape,
-            name=name,
+            kernel_size,
+            "avg",
+            stride,
+            padding,
+            threshold,
+            keep_shape,
+            name,
             **kwargs,
         )
 
-    def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageType]:
-        return _spike_func_avg_pool(vjt, self.pos_thres)
-
-    def synaptic_integr(self, x1: NeuOutType, vjt_pre: VoltageType) -> VoltageType:
-        return vjt_overflow(vjt_pre + self.tfm(x1).ravel())
-
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        n1_ap2d = IF(
-            self.shape_out,
-            threshold=self.pos_thres,
-            reset_v=0,
-            delay=self.delay_relative,
-            tick_wait_start=self.tick_wait_start,
-            tick_wait_end=self.tick_wait_end,
-            keep_shape=self.keep_shape,
-            name=f"n0_{self.name}",
-        )
-
-        syn1 = FullConnSyn(
-            self.module_intf.operands[0],
-            n1_ap2d,
-            weights=self.tfm.connectivity.astype(np.bool_),
-            conn_type=ConnType.All2All,
-            name=f"s0_{self.name}",
-        )
-
-        generated = [n1_ap2d, syn1]
-        self._rebuild_out_intf(network, n1_ap2d, *generated, **build_options)
-
-        return generated
-
-
-@set_rt_mode(1, 1, 1)
-class _SpikingPool2d(FunctionalModule):
-    inherent_delay = 0
 
+class SpikingAvgPool1dWithV(_SpikingPool1dWithV):
     def __init__(
         self,
         neuron: Union[NeuDyn, InputProj],
-        kernel_size: _Size2Type,
-        pool_type: Literal["avg", "max"],
-        stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
+        kernel_size: _Size1Type,
+        stride: Optional[_Size1Type] = None,
+        padding: _Size1Type = 0,
         threshold: Optional[int] = None,
-        # fm_order: _Order3d = "CHW",
+        *,
         keep_shape: bool = True,
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """Basic 2d spiking pooling."""
-        if pool_type not in ("avg", "max"):
-            raise ValueError("type of pooling must be 'avg' or 'max'.")
-
-        # if fm_order not in ("CHW", "HWC"):
-        #     raise ValueError("feature map order must be 'CHW' or 'HWC'.")
+        """1d average pooling for spike with voltage at the previous timestep. The input feature map is in  \
+            'CL' order by default.
 
-        # C,H,W
-        cin, ih, iw = _fm_ndim2_check(neuron.shape_out, "CHW")
+        Args:
+            - neuron: the target neuron to be pooled.
+            - kernel_size: the size of the window to take a max over.
+            - stride: the stride of the window. Default value is `kernel_size`.
+            - padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 1  \
+                integer.
+            - threshold: if specified, the pooling result is o = (sum of the pooling window >= threshold).  \
+                Otherwise the threshold is kernel_size // 2.
 
-        _ksize = _pair(kernel_size)
-        _stride = _pair(stride) if stride is not None else _ksize
-        _padding = _pair(padding)
+        NOTE: the inherent delay of the module is 0.
+        """
+        super().__init__(
+            neuron, kernel_size, stride, padding, threshold, keep_shape, name, **kwargs
+        )
 
-        oh = (ih + 2 * _padding[0] - _ksize[0]) // _stride[0] + 1
-        ow = (iw + 2 * _padding[1] - _ksize[1]) // _stride[1] + 1
 
-        if keep_shape:
-            shape_out = (cin, oh, ow)
-        else:
-            shape_out = (cin * oh * ow,)
+class SpikingMaxPool1d(_SpikingPool1d):
+    def __init__(
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        kernel_size: _Size1Type,
+        stride: Optional[_Size1Type] = None,
+        padding: _Size1Type = 0,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """1d max pooling for spike. The input feature map is in 'CL' order by default.
 
-        self.tfm = _Pool2dForward(
-            cin, (ih, iw), (oh, ow), _ksize, _stride, _padding, pool_type, threshold
-        )
+        Args:
+            - neuron: the target neuron to be pooled.
+            - kernel_size: the size of the window to take a max over.
+            - stride: the stride of the window. Default value is `kernel_size`.
+            - padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of a  \
+                integer.
 
+        NOTE: the inherent delay of the module is 0.
+        """
         super().__init__(
             neuron,
-            shape_out=shape_out,
+            kernel_size,
+            "max",
+            stride,
+            padding,
             keep_shape=keep_shape,
             name=name,
             **kwargs,
         )
 
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        return self.tfm(x1)
-
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        if self.tfm.pool_type == "avg":
-            n1_p2d = Neuron(
-                self.shape_out,
-                leak_v=1 - self.tfm.threshold,
-                neg_threshold=0,
-                delay=self.delay_relative,
-                tick_wait_start=self.tick_wait_start,
-                tick_wait_end=self.tick_wait_end,
-                keep_shape=self.keep_shape,
-            )
-        else:  # "max"
-            n1_p2d = SpikingRelu(
-                self.shape_out,
-                delay=self.delay_relative,
-                tick_wait_start=self.tick_wait_start,
-                tick_wait_end=self.tick_wait_end,
-                keep_shape=self.keep_shape,
-                name=f"n0_{self.name}",
-            )
-
-        syn1 = FullConnSyn(
-            self.module_intf.operands[0],
-            n1_p2d,
-            weights=self.tfm.connectivity.astype(np.bool_),
-            conn_type=ConnType.All2All,
-            name=f"s0_{self.name}",
-        )
-
-        generated = [n1_p2d, syn1]
-        self._rebuild_out_intf(network, n1_p2d, *generated, **build_options)
-
-        return generated
-
 
 class SpikingAvgPool2d(_SpikingPool2d):
     def __init__(
@@ -708,11 +646,11 @@ def __init__(
 
         Args:
             - neuron: the target neuron to be pooled.
-            - kernel_size: the size of the window to take a max over.
+            - kernel_size: the size of the window.
             - stride: the stride of the window. Default value is `kernel_size`.
             - padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 2  \
                 integers.
-            - threshold: if specified, the pooling result is o = (sum of the pooling window > threshold).   \
+            - threshold: if specified, the pooling result is o = (sum of the pooling window >= threshold).  \
                 Otherwise the threshold is kernel_size // 2.
 
         NOTE: the inherent delay of the module is 0.
@@ -743,6 +681,20 @@ def __init__(
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
+        """2d average pooling for spike with voltage at the previous timestep. The input feature map is in  \
+            'CHW' order by default.
+
+        Args:
+            - neuron: the target neuron to be pooled.
+            - kernel_size: the size of the window.
+            - stride: the stride of the window. Default value is `kernel_size`.
+            - padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 2  \
+                integers.
+            - threshold: if specified, the pooling result is o = (sum of the pooling window >= threshold).  \
+                Otherwise the threshold is kernel_size // 2.
+
+        NOTE: the inherent delay of the module is 0.
+        """
         super().__init__(
             neuron, kernel_size, stride, padding, threshold, keep_shape, name, **kwargs
         )
@@ -768,7 +720,7 @@ def __init__(
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """2d max pooling for spike.
+        """2d max pooling for spike. The input feature map is in 'CHW' order by default.
 
         Args:
             - neuron: the target neuron to be pooled.
@@ -791,7 +743,7 @@ def __init__(
         )
 
 
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class SpikingSub(FunctionalModule2to1WithV):
     inherent_delay = 0
     factor_a: int = 1
@@ -843,6 +795,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         syn1 = FullConnSyn(
@@ -870,7 +823,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
     "'Transpose2d' will be removed in a future version. Use 'MatMul2d' instead.",
     category=PAIBoxDeprecationWarning,
 )
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class Transpose2d(TransposeModule):
     def __init__(
         self,
@@ -909,6 +862,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         syn1 = FullConnSyn(
@@ -929,7 +883,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
     "'Transpose3d' will be removed in a future version. Use 'MatMul2d' instead.",
     category=PAIBoxDeprecationWarning,
 )
-@set_rt_mode(1, 1, 1)
+@set_rt_mode_snn()
 class Transpose3d(TransposeModule):
     def __init__(
         self,
@@ -973,6 +927,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             tick_wait_end=self.tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"n0_{self.name}",
+            **self.rt_mode_kwds,
         )
 
         syn1 = FullConnSyn(

From 0b577b55b3474d0d964a181b4e22c1eb69e98d8d Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 24 Jul 2024 11:18:16 +0800
Subject: [PATCH 043/187] =?UTF-8?q?=E2=9C=85=20add=20test=20cases=20for=20?=
 =?UTF-8?q?1d=20spiking=20pooling=20operators?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 173 ++++++++++++++++++++++++++--
 tests/shared_networks.py            |  81 ++++++++-----
 2 files changed, 220 insertions(+), 34 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 167188bb..135d96cc 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -4,11 +4,19 @@
 import paibox as pb
 from paibox.base import DynamicSys
 from paibox.components import NeuModule
-from paibox.components.synapses.conv_utils import _pair
+from paibox.components.synapses.conv_utils import _single, _pair
 from paibox.network import DynSysGroup
 from paibox.simulator.utils import _conv2d_faster_fp32
 from paibox.utils import as_shape, shape2num, typical_round
-from tests.components.utils import avg_pooling, max_pooling
+
+from .utils import (
+    avgpool1d_golden,
+    avgpool2d_golden,
+    maxpool1d_golden,
+    maxpool2d_golden,
+    avg_pooling,
+    max_pooling,
+)
 
 
 def _assert_build_fmodule(
@@ -383,6 +391,106 @@ def test_SpikingSub_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
+    @pytest.mark.parametrize(
+        "shape, channels, ksize, stride, padding, threshold, fm_order, pool_type, p_binomial",
+        [
+            ((24,), 3, (3,), 3, 0, None, "CL", "avg", 0.7),
+            ((12,), 1, (2,), None, 0, None, "CL", "avg", 0.5),
+            ((32,), 8, (3,), None, 0, 3, "CL", "avg", 0.6),
+            ((16,), 8, (5,), (2,), 0, 16, "CL", "avg", 0.7),
+            ((32,), 3, (3,), 2, 0, None, "CL", "max", 0.5),
+            ((24,), 1, (2,), None, 0, None, "CL", "max", 0.4),
+            ((16,), 8, (5,), (2,), 0, None, "CL", "max", 0.6),
+            ((32,), 8, (3,), (3,), 0, None, "CL", "max", 0.3),
+            ((24,), 3, (3,), 3, 1, 4, "CL", "avg", 0.6),
+            ((12,), 1, (2,), None, (1,), None, "CL", "avg", 0.5),
+            ((32,), 8, (3,), None, 2, None, "CL", "avg", 0.5),
+            ((16,), 8, (5,), (2,), (2,), 12, "CL", "avg", 0.4),
+            ((32,), 3, (3,), 2, 1, None, "CL", "max", 0.6),
+            ((24,), 1, (2,), None, 2, None, "CL", "max", 0.7),
+            ((16,), 8, (5,), (2,), (1,), None, "CL", "max", 0.5),
+            ((32,), 8, (3,), (3,), (1,), None, "CL", "max", 0.3),
+        ],
+    )
+    def test_SpikingPool1d(
+        self,
+        shape,
+        channels,
+        ksize,
+        stride,
+        padding,
+        threshold,
+        fm_order,
+        pool_type,
+        p_binomial,
+    ):
+        from tests.shared_networks import SpikingPool1d_Net
+
+        if fm_order == "CL":
+            fm_shape = (channels,) + shape
+        else:
+            fm_shape = shape + (channels,)
+
+        net1 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
+        net2 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
+        p1d = net2.pool
+        generated = DynSysGroup.build_fmodule(net2)
+        sim1 = pb.Simulator(net1, start_time_zero=False)
+        sim2 = pb.Simulator(net2, start_time_zero=False)
+
+        probe_p1d = pb.Probe(generated[p1d][0], "spike")
+        sim2.add_probe(probe_p1d)
+
+        # Use binomial distribution to generate a sparse matrix with more zeros
+        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
+
+        for i in range(20):
+            pb.FRONTEND_ENV.save(data1=inpa[i])
+            sim1.run(1)
+            sim2.run(1)
+
+        _stride = _single(stride) if stride is not None else ksize
+        _padding = _single(padding)
+        if isinstance(threshold, int):
+            _threshold = threshold
+        else:
+            _threshold = typical_round(shape2num(ksize) / 2)
+
+        for i in range(1, 20):
+            if pool_type == "avg":
+                expected = avgpool1d_golden(
+                    inpa[i - 1], ksize, _stride, _padding, fm_order, _threshold
+                ).ravel()
+            else:
+                expected = maxpool1d_golden(
+                    inpa[i - 1], ksize, _stride, _padding, fm_order
+                ).ravel()
+
+            assert np.array_equal(sim1.data[net1.probe2][i], expected)
+            assert np.array_equal(sim2.data[probe_p1d][i], expected)
+
+        for i in range(2, 20):
+            if pool_type == "avg":
+                expected = avgpool1d_golden(
+                    inpa[i - 2], ksize, _stride, _padding, fm_order, _threshold
+                ).ravel()
+            else:
+                expected = maxpool1d_golden(
+                    inpa[i - 2], ksize, _stride, _padding, fm_order
+                ).ravel()
+
+            assert np.array_equal(sim1.data[net1.probe3][i], expected)
+
+    def test_SpikingPool1d_mapping(self, ensure_dump_dir):
+        from tests.shared_networks import SpikingPool1d_Net
+
+        net1 = SpikingPool1d_Net((3, 24), (3,), None, 0, None, "avg")
+
+        mapper = pb.Mapper()
+        mapper.build(net1)
+        mapper.compile()
+        mapper.export(fp=ensure_dump_dir)
+
     @pytest.mark.parametrize(
         "shape, channels, ksize, stride, padding, threshold, fm_order, pool_type, p_binomial",
         [
@@ -426,8 +534,6 @@ def test_SpikingPool2d(
     ):
         from tests.shared_networks import SpikingPool2d_Net
 
-        from .utils import avgpool2d_golden, maxpool2d_golden
-
         if fm_order == "CHW":
             fm_shape = (channels,) + shape
         else:
@@ -435,7 +541,7 @@ def test_SpikingPool2d(
 
         net1 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
         net2 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
-        p2d = net2.pool2d
+        p2d = net2.pool
         generated = DynSysGroup.build_fmodule(net2)
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
@@ -493,6 +599,59 @@ def test_SpikingPool2d_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
+    @pytest.mark.parametrize(
+        "shape, channels, ksize, stride, padding, threshold, p_binomial",
+        [
+            ((24,), 3, (3,), 3, 0, None, 0.5),
+            ((12,), 1, (3,), None, 0, None, 0.7),
+            ((32,), 8, (3,), None, 0, 3, 0.8),
+            ((16,), 8, (5,), 5, 0, 16, 0.5),
+            ((24,), 3, (3,), 3, 1, 4, 0.6),
+            ((12,), 1, (3,), None, (1,), None, 0.7),
+            ((32,), 8, (3,), None, 2, None, 0.7),
+            ((16,), 8, (5,), 3, (2,), 12, 0.5),
+        ],
+    )
+    def test_SpikingAvgPool1dWithV(
+        self, shape, channels, ksize, stride, padding, threshold, p_binomial
+    ):
+        """NOTE: This function is a native implementation of SNNs and is therefore not  \
+            compared to the ANN implementation."""
+        from tests.shared_networks import SpikingPool1d_Net
+
+        fm_shape = (channels,) + shape
+
+        net1 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
+        net2 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
+        p1d = net2.pool
+        generated = DynSysGroup.build_fmodule(net2)
+        sim1 = pb.Simulator(net1, start_time_zero=False)
+        sim2 = pb.Simulator(net2, start_time_zero=False)
+
+        probe_p1d = pb.Probe(generated[p1d][0], "spike")
+        sim2.add_probe(probe_p1d)
+
+        # Use binomial distribution to generate a sparse matrix with more zeros
+        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
+
+        for i in range(20):
+            pb.FRONTEND_ENV.save(data1=inpa[i])
+            sim1.run(1)
+            sim2.run(1)
+
+        for i in range(1, 20):
+            assert np.array_equal(sim1.data[net1.probe2][i], sim2.data[probe_p1d][i])
+
+    def test_SpikingAvgPool1dWithV_mapping(self, ensure_dump_dir):
+        from tests.shared_networks import SpikingPool1d_Net
+
+        net1 = SpikingPool1d_Net((3, 24), (3,), None, 0, None, "avgv")
+
+        mapper = pb.Mapper()
+        mapper.build(net1)
+        mapper.compile()
+        mapper.export(fp=ensure_dump_dir)
+
     @pytest.mark.parametrize(
         "shape, channels, ksize, stride, padding, threshold, p_binomial",
         [
@@ -524,7 +683,7 @@ def test_SpikingAvgPool2dWithV(
 
         net1 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
         net2 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
-        p2d = net2.pool2d
+        p2d = net2.pool
         generated = DynSysGroup.build_fmodule(net2)
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
@@ -543,7 +702,7 @@ def test_SpikingAvgPool2dWithV(
         for i in range(1, 20):
             assert np.array_equal(sim1.data[net1.probe2][i], sim2.data[probe_p2d][i])
 
-    def test_SpikingPool2dWithV_mapping(self, ensure_dump_dir):
+    def test_SpikingAvgPool2dWithV_mapping(self, ensure_dump_dir):
         from tests.shared_networks import SpikingPool2d_Net
 
         net1 = SpikingPool2d_Net((3, 24, 24), (3, 3), None, 0, None, "avgv")
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 77610ec2..6986efca 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -85,6 +85,15 @@ def __init__(self):
         self.s1 = pb.FullConn(self.n1, self.n2)
 
 
+_2to1_op = {
+    "and": pb.BitwiseAND,
+    "or": pb.BitwiseOR,
+    "xor": pb.BitwiseXOR,
+    "add": pb.SpikingAdd,
+    "sub": pb.SpikingSub,
+}
+
+
 class FunctionalModule_2to1_Net(pb.DynSysGroup):
     def __init__(self, op: Literal["and", "or", "xor", "add", "sub"]):
         super().__init__()
@@ -97,16 +106,7 @@ def __init__(self, op: Literal["and", "or", "xor", "add", "sub"]):
         self.s1 = pb.FullConn(self.inp1, self.n1, conn_type=pb.SynConnType.One2One)
         self.s2 = pb.FullConn(self.inp2, self.n2, conn_type=pb.SynConnType.One2One)
 
-        if op == "and":
-            self.func_node = pb.BitwiseAND(self.n1, self.n2, delay=1, tick_wait_start=2)
-        elif op == "or":
-            self.func_node = pb.BitwiseOR(self.n1, self.n2, delay=1, tick_wait_start=2)
-        elif op == "xor":
-            self.func_node = pb.BitwiseXOR(self.n1, self.n2, delay=1, tick_wait_start=2)
-        elif op == "add":
-            self.func_node = pb.SpikingAdd(self.n1, self.n2, delay=1, tick_wait_start=2)
-        elif op == "sub":
-            self.func_node = pb.SpikingSub(self.n1, self.n2, delay=1, tick_wait_start=2)
+        self.func_node = _2to1_op[op](self.n1, self.n2, delay=1, tick_wait_start=2)
 
         self.n3 = pb.SpikingRelu(
             (10,),
@@ -155,34 +155,61 @@ def __init__(self, op: Literal["not", "delay"]):
             self.probe4 = pb.Probe(self.func_node, "voltage")
 
 
-class SpikingPool2d_Net(pb.DynSysGroup):
-    def __init__(self, fm_shape, ksize, stride, padding, threshold, pool_type):
+_pool_op = {
+    (1, "avg"): pb.SpikingAvgPool1d,
+    (1, "avgv"): pb.SpikingAvgPool1dWithV,
+    (2, "avg"): pb.SpikingAvgPool2d,
+    (2, "avgv"): pb.SpikingAvgPool2dWithV,
+    (1, "max"): pb.SpikingMaxPool1d,
+    (2, "max"): pb.SpikingMaxPool2d,
+}
+
+
+class _SpikingPoolNd_Net(pb.DynSysGroup):
+    def __init__(
+        self, pool_ndim, fm_shape, ksize, stride, padding, threshold, pool_type
+    ):
         super().__init__()
         self.inp1 = pb.InputProj(input=_out_bypass1, shape_out=fm_shape)
         self.n1 = pb.SpikingRelu(fm_shape, tick_wait_start=1)
         self.s1 = pb.FullConn(self.inp1, self.n1, conn_type=pb.SynConnType.One2One)
 
-        if pool_type == "avg":
-            self.pool2d = pb.SpikingAvgPool2d(
-                self.n1, ksize, stride, padding, threshold, delay=1, tick_wait_start=2
-            )
-        elif pool_type == "avgv":
-            self.pool2d = pb.SpikingAvgPool2dWithV(
-                self.n1, ksize, stride, padding, threshold, delay=1, tick_wait_start=2
-            )
-        else:  # "max"
-            self.pool2d = pb.SpikingMaxPool2d(
-                self.n1, ksize, stride, padding, delay=1, tick_wait_start=2
-            )
+        self.pool = _pool_op[(pool_ndim, pool_type)](
+            self.n1,
+            ksize,
+            stride,
+            padding,
+            threshold=threshold,  # no need for maxpool
+            delay=1,
+            tick_wait_start=2,
+        )
 
-        self.n2 = pb.SpikingRelu(self.pool2d.shape_out, delay=1, tick_wait_start=3)
-        self.s3 = pb.FullConn(self.pool2d, self.n2, conn_type=pb.SynConnType.One2One)
+        self.n2 = pb.SpikingRelu(self.pool.shape_out, delay=1, tick_wait_start=3)
+        self.s3 = pb.FullConn(self.pool, self.n2, conn_type=pb.SynConnType.One2One)
 
         self.probe1 = pb.Probe(self.n1, "spike")
-        self.probe2 = pb.Probe(self.pool2d, "spike")
+        self.probe2 = pb.Probe(self.pool, "spike")
         self.probe3 = pb.Probe(self.n2, "spike")
 
 
+class SpikingPool1d_Net(_SpikingPoolNd_Net):
+    pool_ndim = 1
+
+    def __init__(self, fm_shape, ksize, stride, padding, threshold, pool_type):
+        super().__init__(
+            self.pool_ndim, fm_shape, ksize, stride, padding, threshold, pool_type
+        )
+
+
+class SpikingPool2d_Net(_SpikingPoolNd_Net):
+    pool_ndim = 2
+
+    def __init__(self, fm_shape, ksize, stride, padding, threshold, pool_type):
+        super().__init__(
+            self.pool_ndim, fm_shape, ksize, stride, padding, threshold, pool_type
+        )
+
+
 class TransposeModule_T2d_Net(pb.DynSysGroup):
     def __init__(self, shape):
         super().__init__()

From bbd513a0405deea57d13aa75d369a24de75effdc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 09:46:33 +0000
Subject: [PATCH 044/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/__init__.py                       | 2 +-
 paibox/components/_modules.py            | 6 +++---
 paibox/components/modules.py             | 6 +++---
 paibox/components/neuron/base.py         | 2 +-
 paibox/components/neuron/utils.py        | 4 ++--
 paibox/components/synapses/transforms.py | 2 +-
 tests/components/test_functional.py      | 6 +++---
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index 2bc5f7fd..d776f414 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -17,8 +17,8 @@
 from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
 from .components.functional import SpikingAdd as SpikingAdd
 from .components.functional import SpikingAvgPool1d as SpikingAvgPool1d
-from .components.functional import SpikingAvgPool2d as SpikingAvgPool2d
 from .components.functional import SpikingAvgPool1dWithV as SpikingAvgPool1dWithV
+from .components.functional import SpikingAvgPool2d as SpikingAvgPool2d
 from .components.functional import SpikingAvgPool2dWithV as SpikingAvgPool2dWithV
 from .components.functional import SpikingMaxPool1d as SpikingMaxPool1d
 from .components.functional import SpikingMaxPool2d as SpikingMaxPool2d
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 5fc92953..f1847096 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -20,7 +20,7 @@
 from .projection import InputProj
 from .synapses import ConnType, FullConnSyn
 from .synapses.conv_types import _Size1Type, _Size2Type
-from .synapses.conv_utils import _fm_ndim1_check, _fm_ndim2_check, _single, _pair
+from .synapses.conv_utils import _fm_ndim1_check, _fm_ndim2_check, _pair, _single
 from .synapses.transforms import (
     Conv1dForward,
     Conv2dForward,
@@ -307,8 +307,8 @@ def __init__(
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """Basic 2d spiking pooling with voltage at the previous timestep. 
-        
+        """Basic 2d spiking pooling with voltage at the previous timestep.
+
         NOTE: This is not a regular average pooling operator. It is just to correspond to the operators \
             that appear in PAIFLOW.
         """
diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index f2857fec..09bdf247 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -1,20 +1,20 @@
-from functools import partial
 import sys
 import typing
 from collections import deque
 from collections.abc import Sequence
 from dataclasses import dataclass, field
+from functools import partial
 from typing import ClassVar, Literal, Optional, TypeVar, Union
 
 import numpy as np
-from paicorelib import CoreMode, TM, HwConfig, get_core_mode, SNNModeEnable
+from paicorelib import TM, CoreMode, HwConfig, SNNModeEnable, get_core_mode
 
 from paibox.base import NeuDyn
 from paibox.exceptions import NotSupportedError, RegisterError, ShapeError
 from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
-from .neuron.utils import _input_width_format, _spike_width_format, _RTModeKwds
+from .neuron.utils import _input_width_format, _RTModeKwds, _spike_width_format
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 37d82aef..ad47b84f 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -44,9 +44,9 @@
     _input_width_format,
     _leak_v_check,
     _mask,
+    _RTModeKwds,
     _spike_width_format,
     vjt_overflow,
-    _RTModeKwds,
 )
 
 __all__ = ["Neuron"]
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index 96415df7..1d9ea2ff 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -1,8 +1,8 @@
 import warnings
-from typing import Literal, Union, TypedDict
+from typing import Literal, TypedDict, Union
 
 import numpy as np
-from paicorelib import InputWidthFormat, SpikeWidthFormat, SNNModeEnable
+from paicorelib import InputWidthFormat, SNNModeEnable, SpikeWidthFormat
 from paicorelib.framelib.utils import _mask
 from paicorelib.ram_model import (
     BIT_TRUNCATE_MAX,
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 8ba80523..338b3ac5 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -17,7 +17,7 @@
 )
 from paibox.utils import is_shape, shape2num, typical_round
 
-from .conv_types import _SizeAnyType, Size1Type, Size2Type, SizeAnyType
+from .conv_types import Size1Type, Size2Type, SizeAnyType, _SizeAnyType
 from .conv_utils import (
     _conv1d_faster,
     _conv1d_unroll,
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 135d96cc..8f744409 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -4,18 +4,18 @@
 import paibox as pb
 from paibox.base import DynamicSys
 from paibox.components import NeuModule
-from paibox.components.synapses.conv_utils import _single, _pair
+from paibox.components.synapses.conv_utils import _pair, _single
 from paibox.network import DynSysGroup
 from paibox.simulator.utils import _conv2d_faster_fp32
 from paibox.utils import as_shape, shape2num, typical_round
 
 from .utils import (
+    avg_pooling,
     avgpool1d_golden,
     avgpool2d_golden,
+    max_pooling,
     maxpool1d_golden,
     maxpool2d_golden,
-    avg_pooling,
-    max_pooling,
 )
 
 

From 3cbf212bb66bbf56c9e83f5b409c183366a23888 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 26 Aug 2024 19:45:10 +0800
Subject: [PATCH 045/187] =?UTF-8?q?=E2=9C=85=20skip=20some=20unready=20tes?=
 =?UTF-8?q?ts?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 8f744409..1cc25221 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -248,6 +248,7 @@ def test_BitwiseXOR_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
+    @pytest.mark.skip(reason="'rt_mode_kwds' is not implemented in DelayChain.")
     def test_DelayChain(self):
         from tests.shared_networks import FunctionalModule_1to1_Net
 
@@ -279,6 +280,7 @@ def test_DelayChain(self):
 
         _assert_build_fmodule(net1, 3 + 1 + 2, 3 + 2 * net1.func_node.chain_level + 2)
 
+    @pytest.mark.skip(reason="'rt_mode_kwds' is not implemented in DelayChain.")
     def test_DelayChain_mapping(self, ensure_dump_dir):
         from tests.shared_networks import FunctionalModule_1to1_Net
 
@@ -807,6 +809,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
+    @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
         "shape, kernel, stride, padding",
         [
@@ -861,6 +864,7 @@ def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
         # print(expected)
         # print(sim1.data[probe_conv])
 
+    @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
         "shape, kernel, stride, padding, out_feature, weight",
         [
@@ -939,6 +943,7 @@ def test_Conv2dSemiMap_Net(
         # expected = np.clip(expected, 0, 7)
         assert np.array_equal(expected, sim2.data[probe_linear][15])
 
+    @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
         "shape, kernel_size, stride, weight, pool_type",
         [
@@ -1013,6 +1018,7 @@ def test_Pool2dSemiMap(self, shape, kernel_size, stride, weight, pool_type):
         # print(sim1.data[probe_pool])
         # print(sim1.data[probe_linear])
 
+    @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
         "shape, weight1",
         [

From de01afdfb1a28df780c6850763de3f4c72866328 Mon Sep 17 00:00:00 2001
From: birdswimming <birdswimming3.14@gmail.com>
Date: Fri, 13 Sep 2024 15:53:17 +0800
Subject: [PATCH 046/187] fix bug in convert routing group

---
 paibox/backend/graphs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index d6db0e1b..640052cf 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -609,10 +609,11 @@ def convert2routing_groups(
         # If out-degree > 1, group successor core blocks according to their routing id.
         if degrees_of_cb[cb].out_degree > 1:
             succ_cbs = succ_dg_of_cb[cb]
-            seen_cb.update(succ_cbs)
 
             succ_cb_gid_dict.clear()
             for succ_cb in succ_cbs:
+                if succ_cb in seen_cb:
+                    continue
                 if succ_cb._routing_id in succ_cb_gid_dict:
                     succ_cb_gid_dict[succ_cb._routing_id].append(succ_cb)
                 else:
@@ -621,6 +622,8 @@ def convert2routing_groups(
             for v in succ_cb_gid_dict.values():
                 routing_groups.append(RoutingGroup(*v))
 
+            seen_cb.update(succ_cbs)
+
     routing_groups_succ: dict[RoutingGroup, list[RoutingGroup]] = defaultdict(list)
 
     for rg in routing_groups:

From 320b60e35726fd8ee1fcfbc00aafc7edf75d6575 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 27 Aug 2024 16:29:18 +0800
Subject: [PATCH 047/187] =?UTF-8?q?=F0=9F=97=91=EF=B8=8F=20remove=20type?=
 =?UTF-8?q?=20`DataArrayType`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/neurons.py      |  6 +++---
 paibox/components/synapses/base.py       |  6 +++---
 paibox/components/synapses/synapses.py   |  4 ++--
 paibox/components/synapses/transforms.py | 10 +++++-----
 paibox/types.py                          |  3 ---
 5 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 48619307..60dacf8e 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -3,7 +3,7 @@
 import numpy as np
 from paicorelib import LDM, NTM, RM
 
-from paibox.types import DataArrayType, Shape
+from paibox.types import LEAK_V_DTYPE, DataType, Shape
 
 from .base import Neuron
 from .utils import LEAK_V_MAX
@@ -72,7 +72,7 @@ def __init__(
         threshold: int,
         reset_v: Optional[int] = None,
         leak_v: int = 0,
-        bias: Optional[DataArrayType] = None,
+        bias: Optional[DataType] = None,
         neg_threshold: Optional[int] = None,
         *,
         keep_shape: bool = True,
@@ -107,7 +107,7 @@ def __init__(
             _rm = RM.MODE_LINEAR
 
         if isinstance(bias, (list, tuple, np.ndarray)):
-            _bias = np.asarray(bias, dtype=np.int32)
+            _bias = np.asarray(bias, dtype=LEAK_V_DTYPE)
         elif bias is not None:
             _bias = int(bias)
         else:
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index c95260f6..ef69a7c7 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -6,7 +6,7 @@
 
 from paibox.base import NeuDyn, SynSys
 from paibox.exceptions import RegisterError, ShapeError
-from paibox.types import DataArrayType, NeuOutType, SynOutType, WeightType
+from paibox.types import DataType, NeuOutType, SynOutType, WeightType
 
 from ..modules import BuildingModule
 from ..neuron import Neuron
@@ -202,7 +202,7 @@ def __init__(
         self,
         source: Union[NeuDyn, InputProj],
         target: NeuDyn,
-        weights: DataArrayType,
+        weights: DataType,
         conn_type: ConnType,
         name: Optional[str] = None,
     ) -> None:
@@ -477,7 +477,7 @@ def __init__(
         self,
         source: Union[NeuDyn, InputProj],
         dest: Neuron,
-        weights: DataArrayType = 1,
+        weights: DataType = 1,
         name: Optional[str] = None,
     ) -> None:
         super().__init__(source, dest, name)
diff --git a/paibox/components/synapses/synapses.py b/paibox/components/synapses/synapses.py
index ec301485..20e9b63d 100644
--- a/paibox/components/synapses/synapses.py
+++ b/paibox/components/synapses/synapses.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from paibox.base import NeuDyn
-from paibox.types import DataArrayType
+from paibox.types import DataType
 
 from ..neuron import Neuron
 from ..projection import InputProj
@@ -33,7 +33,7 @@ def __init__(
         self,
         source: Union[NeuDyn, InputProj],
         dest: NeuDyn,
-        weights: DataArrayType = 1,
+        weights: DataType = 1,
         *,
         conn_type: ConnType = ConnType.All2All,
         name: Optional[str] = None,
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 338b3ac5..5296736b 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -9,7 +9,7 @@
 from paibox.types import (
     VOLTAGE_DTYPE,
     WEIGHT_DTYPE,
-    DataArrayType,
+    DataType,
     IntScalarType,
     NeuOutType,
     SynOutType,
@@ -70,7 +70,7 @@ class ConnType(Enum):
     """All-to-all connection."""
 
 
-def _set_coarse_dtype(raw_w: DataArrayType) -> WeightType:
+def _set_coarse_dtype(raw_w: DataType) -> WeightType:
     """Convert raw weights to `np.ndarray` coarsely (without optimization).
 
     Description:
@@ -130,7 +130,7 @@ def _get_weight_width_inner(weight: WeightType, enable_wp_opt: bool) -> WW:
 
 
 class Transform:
-    def __init__(self, weights: DataArrayType) -> None:
+    def __init__(self, weights: DataType) -> None:
         self.weights = _set_coarse_dtype(weights)
         """The actual weights in synapses. Stored in np.int8 format."""
 
@@ -154,7 +154,7 @@ def connectivity(self) -> WeightType:
 
 
 class OneToOne(Transform):
-    def __init__(self, num: int, weights: DataArrayType) -> None:
+    def __init__(self, num: int, weights: DataType) -> None:
         """
         Arguments:
             - num: number of neurons.
@@ -206,7 +206,7 @@ def __init__(self, num: int, scaling_factor: IntScalarType = 1) -> None:
 
 
 class AllToAll(Transform):
-    def __init__(self, conn_size: Size2Type, weights: DataArrayType) -> None:
+    def __init__(self, conn_size: Size2Type, weights: DataType) -> None:
         """
         Arguments:
             - conn_size: size of connections.
diff --git a/paibox/types.py b/paibox/types.py
index f5a90365..f963f13e 100644
--- a/paibox/types.py
+++ b/paibox/types.py
@@ -14,9 +14,6 @@
 Scalar = TypeVar("Scalar", int, float, np.generic)
 IntScalarType = TypeVar("IntScalarType", int, np.bool_, np.integer)
 DataType = TypeVar("DataType", int, np.bool_, np.integer, np.ndarray)
-DataArrayType = TypeVar(
-    "DataArrayType", int, np.bool_, np.integer, list[int], tuple[int, ...], np.ndarray
-)
 
 LEAK_V_DTYPE = np.int32
 SPIKE_DTYPE = np.bool_

From 3be0f6cc0f36a947e223e39334d0a43ea4f67931 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 29 Aug 2024 20:14:49 +0800
Subject: [PATCH 048/187] =?UTF-8?q?=E2=9C=A8=20add=20`ANNNeuron`=20&=20dep?=
 =?UTF-8?q?recate=20`SpikingRelu`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/__init__.py                  |  1 +
 paibox/components/_modules.py       |  4 +-
 paibox/components/functional.py     | 10 ++--
 paibox/components/neuron/neurons.py | 71 +++++++++++++++++++++++++++--
 tests/backend/conftest.py           |  2 +-
 tests/shared_networks.py            | 25 ++++++----
 6 files changed, 94 insertions(+), 19 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index d776f414..d132bdfe 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -32,6 +32,7 @@
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
 from .components.neuron.neurons import SpikingRelu as SpikingRelu
 from .components.neuron.neurons import TonicSpiking as TonicSpiking
+from .components.neuron.neurons import BypassNeuron as BypassNeuron
 
 # Input projection
 from .components.projection import InputProj as InputProj
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index f1847096..e998cb80 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -98,7 +98,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
                 **self.rt_mode_kwds,
             )
         else:  # "max"
-            n1_p1d = SpikingRelu(
+            n1_p1d = BypassNeuron(
                 self.shape_out,
                 delay=self.delay_relative,
                 tick_wait_start=self.tick_wait_start,
@@ -268,7 +268,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
                 **self.rt_mode_kwds,
             )
         else:  # "max"
-            n1_p2d = SpikingRelu(
+            n1_p2d = BypassNeuron(
                 self.shape_out,
                 delay=self.delay_relative,
                 tick_wait_start=self.tick_wait_start,
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 584f6aa3..d3ff7c2f 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -231,7 +231,7 @@ def spike_func(self, x1: NeuOutType, x2: NeuOutType, **kwargs) -> NeuOutType:
         return x1 | x2
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        n1_or = SpikingRelu(
+        n1_or = BypassNeuron(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start,
@@ -293,7 +293,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         # If neuron_a is of shape (h1, w1) = N, and neuron_b is of shape (h2, w2) = N.
         # The output shape of the module is (N,) or (h1, w1)(if h1 == h2).
         # The shape of n1 is (2N,) or (2, h1, w1).
-        n1_aux = SpikingRelu(
+        n1_aux = BypassNeuron(
             (2,) + self.shape_out,
             delay=1,
             tick_wait_start=self.tick_wait_start,
@@ -322,7 +322,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         )
 
         # The shape of n2 is (N,) or (h1, w1).
-        n2_xor = SpikingRelu(
+        n2_xor = BypassNeuron(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=n1_aux.tick_wait_start + 1,
@@ -855,7 +855,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return _x1.T
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        n1_t2d = SpikingRelu(
+        n1_t2d = BypassNeuron(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start,
@@ -920,7 +920,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return _x1.transpose(self.axes)
 
     def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        n1_t3d = SpikingRelu(
+        n1_t3d = BypassNeuron(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start,
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 60dacf8e..4824bd86 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -3,12 +3,29 @@
 import numpy as np
 from paicorelib import LDM, NTM, RM
 
+from paibox.exceptions import PAIBoxDeprecationWarning
 from paibox.types import LEAK_V_DTYPE, DataType, Shape
 
 from .base import Neuron
 from .utils import LEAK_V_MAX
 
-__all__ = ["IF", "LIF", "TonicSpiking", "PhasicSpiking", "SpikingRelu", "Always1Neuron"]
+import sys
+
+if sys.version_info >= (3, 13):
+    from typing import deprecated
+else:
+    from typing_extensions import deprecated
+
+__all__ = [
+    "IF",
+    "LIF",
+    "TonicSpiking",
+    "PhasicSpiking",
+    "BypassNeuron",
+    "Always1Neuron",
+    "ANNBypassNeuron",
+    "ANNNeuron",
+]
 
 
 class IF(Neuron):
@@ -224,7 +241,7 @@ def __init__(
         )
 
 
-class SpikingRelu(Neuron):
+class BypassNeuron(Neuron):
     def __init__(
         self,
         shape: Shape,
@@ -233,13 +250,61 @@ def __init__(
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """Spiking relu neuron. Act exactly the way you think.
+        """Bypass neuron. Output is equal to input.
 
         Args:
             - shape: shape of neurons.
             - keep_shape: whether to maintain shape in the simulation. Default is `True`.
             - name: name of the neuron. Optional.
+
+        NOTE: positive threshold = 1, negative threshold = 0, reset_v = 0, and leak_v = 0.
+
         """
         super().__init__(
             shape, neg_threshold=0, keep_shape=keep_shape, name=name, **kwargs
         )
+
+
+@deprecated(
+    "'SpikingRelu' is deprecated in version 1.2.0 and   \
+        will be removed in version 1.3.0. Use 'BypassNeuron' instead.",
+    category=PAIBoxDeprecationWarning,
+)
+class SpikingRelu(BypassNeuron):
+    pass
+
+
+class ANNNeuron(LIF):
+    def __init__(
+        self,
+        shape: Shape,
+        bias: Optional[DataType] = None,
+        bit_trunc: int = 8,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """General neuron used in ANN mode. Positive threshold = 1, negative threshold = 0."""
+        kwargs["bit_truncation"] = bit_trunc
+        kwargs.setdefault("input_width", 8)
+        kwargs.setdefault("spike_width", 8)
+        kwargs.setdefault("snn_en", False)
+
+        super().__init__(
+            shape, 1, bias=bias, keep_shape=keep_shape, name=name, **kwargs
+        )
+
+
+class ANNBypassNeuron(ANNNeuron):
+    def __init__(
+        self,
+        shape: Shape,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            shape, bias=None, bit_trunc=8, keep_shape=keep_shape, name=name, **kwargs
+        )
diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index 21315b3a..48387125 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -561,7 +561,7 @@ def __init__(self, scale: int):
 
             self.n.append(pb.IF((n,), thres, resetv))
 
-        self.n_out = pb.SpikingRelu(1000)
+        self.n_out = pb.BypassNeuron(1000)
 
         self.s = NodeList()
 
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 6986efca..401f8341 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -108,7 +108,7 @@ def __init__(self, op: Literal["and", "or", "xor", "add", "sub"]):
 
         self.func_node = _2to1_op[op](self.n1, self.n2, delay=1, tick_wait_start=2)
 
-        self.n3 = pb.SpikingRelu(
+        self.n3 = pb.BypassNeuron(
             (10,),
             delay=1,
             tick_wait_start=self.func_node.tick_wait_start
@@ -137,9 +137,18 @@ def __init__(self, op: Literal["not", "delay"]):
         if op == "not":
             self.func_node = pb.BitwiseNOT(self.n1, tick_wait_start=2)
         elif op == "delay":
-            self.func_node = pb.DelayChain(self.n1, chain_level=5, tick_wait_start=2)
-
-        self.n2 = pb.SpikingRelu(
+            if hasattr(pb, "DelayChain"):
+                self.func_node = pb.DelayChain(  # type: ignore
+                    self.n1, chain_level=5, tick_wait_start=2
+                )
+            else:
+                from paibox.components._modules import _DelayChainSNN
+
+                self.func_node = _DelayChainSNN(
+                    self.n1, chain_level=5, tick_wait_start=2
+                )
+
+        self.n2 = pb.BypassNeuron(
             (10,),
             delay=1,
             tick_wait_start=self.func_node.tick_wait_start
@@ -171,7 +180,7 @@ def __init__(
     ):
         super().__init__()
         self.inp1 = pb.InputProj(input=_out_bypass1, shape_out=fm_shape)
-        self.n1 = pb.SpikingRelu(fm_shape, tick_wait_start=1)
+        self.n1 = pb.BypassNeuron(fm_shape, tick_wait_start=1)
         self.s1 = pb.FullConn(self.inp1, self.n1, conn_type=pb.SynConnType.One2One)
 
         self.pool = _pool_op[(pool_ndim, pool_type)](
@@ -184,7 +193,7 @@ def __init__(
             tick_wait_start=2,
         )
 
-        self.n2 = pb.SpikingRelu(self.pool.shape_out, delay=1, tick_wait_start=3)
+        self.n2 = pb.BypassNeuron(self.pool.shape_out, delay=1, tick_wait_start=3)
         self.s3 = pb.FullConn(self.pool, self.n2, conn_type=pb.SynConnType.One2One)
 
         self.probe1 = pb.Probe(self.n1, "spike")
@@ -218,7 +227,7 @@ def __init__(self, shape):
         self.n1 = pb.IF(shape, 1, 0, tick_wait_start=1)
         self.s1 = pb.FullConn(self.inp1, self.n1, conn_type=pb.SynConnType.One2One)
         self.t2d = pb.Transpose2d(self.n1, tick_wait_start=2)
-        self.n2 = pb.SpikingRelu(
+        self.n2 = pb.BypassNeuron(
             shape, tick_wait_start=self.t2d.tick_wait_start + self.t2d.external_delay
         )
         self.s2 = pb.FullConn(self.t2d, self.n2, conn_type=pb.SynConnType.One2One)
@@ -235,7 +244,7 @@ def __init__(self, shape, axes):
         self.n1 = pb.IF(shape, 1, 0, tick_wait_start=1)
         self.s1 = pb.FullConn(self.inp1, self.n1, conn_type=pb.SynConnType.One2One)
         self.t3d = pb.Transpose3d(self.n1, axes=axes, tick_wait_start=2)
-        self.n2 = pb.SpikingRelu(
+        self.n2 = pb.BypassNeuron(
             shape, tick_wait_start=self.t3d.tick_wait_start + self.t3d.external_delay
         )
         self.s2 = pb.FullConn(self.t3d, self.n2, conn_type=pb.SynConnType.One2One)

From de50ddcd7a498a18a49ef1bd07292dabb7452730 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 29 Aug 2024 20:15:09 +0800
Subject: [PATCH 049/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20ANNNeur?=
 =?UTF-8?q?on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/neuron/test_neurons.py | 58 ++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/tests/components/neuron/test_neurons.py b/tests/components/neuron/test_neurons.py
index 1fda7974..bb4d72dc 100644
--- a/tests/components/neuron/test_neurons.py
+++ b/tests/components/neuron/test_neurons.py
@@ -536,8 +536,8 @@ def test_PhasicSpiking(self):
             assert np.array_equal(n1.spike, expected_spike[i])
             assert np.array_equal(n1.voltage, expected_vol[i])
 
-    def test_SpikingRelu(self):
-        n1 = pb.SpikingRelu(1)
+    def test_BypassNeuron(self):
+        n1 = pb.BypassNeuron(1, **_snn_kwds)
 
         incoming_v = np.random.randint(0, 2, size=(20, 1), dtype=np.bool_)
 
@@ -624,6 +624,60 @@ def test_AvgPool_Neuron(self, n_window):
             assert np.array_equal(n1.spike[0], expected)
 
 
+from paibox.components.neuron.neurons import ANNNeuron
+
+
+class TestANNNeuron:
+    def test_ANNNeuron(self):
+        n1 = ANNNeuron(1, 0, 8)
+
+        incoming_v = np.random.randint(-128, 128, size=(20, 1), dtype=np.int32)
+
+        for i in range(incoming_v.size):
+            pb.FRONTEND_ENV["t"] += 1
+            n1.update(incoming_v[i])
+
+            assert np.array_equal(
+                n1.spike, np.asarray([0]) if incoming_v[i] < 0 else incoming_v[i]
+            )
+
+        assert 1
+
+    @pytest.mark.parametrize(
+        "bit_trunc, expected_v",
+        [
+            (8, np.array([10, 255, 255, 90, 110 & 255, 255, 0, 0], dtype=np.uint8)),
+            (
+                9,
+                np.array(
+                    [
+                        (10 >> 1) & 255,
+                        (390 >> 1) & 255,
+                        255,
+                        (90 >> 1) & 255,
+                        (110 >> 1) & 255,
+                        (468 >> 1) & 255,
+                        0,
+                        0,
+                    ],
+                    dtype=np.uint8,
+                ),
+            ),
+        ],
+        ids=["8_bit", "9_bit"],
+    )
+    def test_ANNNeuron_bit_trunc(self, bit_trunc, expected_v):
+        n1 = ANNNeuron(1, -10, bit_trunc)
+
+        incoming_v = np.array([20, 400, 1000, 100, 120, 478, 0, -10], dtype=np.int32)
+
+        for i in range(incoming_v.size):
+            pb.FRONTEND_ENV["t"] += 1
+            n1.update(incoming_v[i])
+
+            assert np.array_equal(n1.spike[0], expected_v[i])
+
+
 class TestNeuronAllModes:
     """Test neuron with specified 'spike width' & 'snn_en'.
 

From f0c73be08ff762da9f2b2195e1871265af0d0894 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 29 Aug 2024 20:16:22 +0800
Subject: [PATCH 050/187] =?UTF-8?q?=F0=9F=A7=B1=20write=20bit=20truncation?=
 =?UTF-8?q?=20processing=20as=20a=20static=20method?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/base.py        | 26 ++++++++++++----------
 tests/components/neuron/test_neurons.py | 29 +++++++------------------
 2 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index ad47b84f..f87e41a6 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -320,19 +320,10 @@ def _bit_truncate(self, vj: VoltageType) -> NeuOutType:
 
             If the MSB of voltage is greater than the truncation bit, return 8'd255.
         """
-
-        def _truncate() -> VoltageType:
-            if (vj >> self.bit_truncation > 0).all():  # Saturate truncation
-                return np.full_like(vj, _mask(8))
-            elif self.bit_truncation == 0:
-                return self._vjt0
-            elif self.bit_truncation < 8:
-                return (vj << (8 - self.bit_truncation)) & _mask(8)
-            else:
-                return (vj >> (self.bit_truncation - 8)) & _mask(8)
-
         v_truncated = np.where(
-            self.thres_mode == TM.EXCEED_POSITIVE, _truncate(), self._vjt0
+            self.thres_mode == TM.EXCEED_POSITIVE,
+            self._truncate(vj, self.bit_truncation),
+            self._vjt0,
         )
         return v_truncated.astype(NEUOUT_U8_DTYPE)
 
@@ -381,6 +372,17 @@ def update(
     def init_param(self, param: Any) -> np.ndarray:
         return np.full((self._n_neuron,), param)
 
+    @staticmethod
+    def _truncate(v: VoltageType, bit_trunc: int) -> VoltageType:
+        if np.any((v >> bit_trunc) > 0):  # Saturate truncation
+            return np.full_like(v, _mask(8))
+        elif bit_trunc == 0:
+            return np.full_like(v, 0)
+        elif bit_trunc < 8:
+            return (v << (8 - bit_trunc)) & _mask(8)
+        else:
+            return (v >> (bit_trunc - 8)) & _mask(8)
+
     @property
     def _vjt0(self) -> VoltageType:
         return self.init_param(0).astype(VOLTAGE_DTYPE)
diff --git a/tests/components/neuron/test_neurons.py b/tests/components/neuron/test_neurons.py
index bb4d72dc..901e3732 100644
--- a/tests/components/neuron/test_neurons.py
+++ b/tests/components/neuron/test_neurons.py
@@ -9,6 +9,7 @@
 
 import paibox as pb
 from paibox.components import Neuron
+from paibox.components.neuron.base import MetaNeuron
 from paibox.components.neuron.utils import VJT_MAX, VJT_MIN
 from paibox.exceptions import ShapeError
 from paibox.types import NEUOUT_U8_DTYPE, VoltageType
@@ -686,23 +687,15 @@ class TestNeuronAllModes:
 
     @staticmethod
     def _ann_vjt_func(vj: VoltageType, neuron: Neuron) -> NDArray[NEUOUT_U8_DTYPE]:
-        def _bit_tuncate(bit_tunc: int, vj: VoltageType):
-            if bit_tunc == 0:
-                return np.zeros_like(vj)
-            elif vj >> bit_tunc > 0:  # Saturate truncation
-                return np.full_like(vj, 255)
-            elif bit_tunc < 8:
-                return (vj << (8 - bit_tunc)) & 255
-            else:
-                return (vj >> (bit_tunc - 8)) & 255
-
         return np.where(
             vj >= neuron.pos_threshold,
-            _bit_tuncate(neuron.bit_truncation, vj),
-            neuron._vjt0,
+            MetaNeuron._truncate(vj, neuron.bit_truncation),
+            0,
         ).astype(NEUOUT_U8_DTYPE)
 
-    @pytest.mark.parametrize("reg_kwds", [_reg010_kwds, _reg110_kwds])
+    @pytest.mark.parametrize(
+        "reg_kwds", [_reg010_kwds, _reg110_kwds], ids=["010", "ann"]
+    )
     def test_IF_ss10(self, reg_kwds):
         n1 = pb.IF(1, 0, 0, bit_truncation=8, **reg_kwds)
 
@@ -713,10 +706,7 @@ def test_IF_ss10(self, reg_kwds):
         for i in range(incoming_v.size):
             pb.FRONTEND_ENV["t"] += 1
             n1.update(incoming_v[i])
-            v_bt = self._ann_vjt_func(
-                np.asarray(incoming_v[i], dtype=np.int32),
-                n1,
-            )
+            v_bt = self._ann_vjt_func(np.atleast_1d(incoming_v[i]), n1)
 
             assert np.array_equal(n1.spike, v_bt)
 
@@ -734,10 +724,7 @@ def test_LIF_ss11(self):
             pre_vjt += incoming_v[i]
             spike = pre_vjt >= pos_thres
 
-            v_bt = self._ann_vjt_func(
-                np.asarray(pre_vjt, dtype=np.int32),
-                n1,
-            )
+            v_bt = self._ann_vjt_func(np.atleast_1d(pre_vjt), n1)
 
             if spike:
                 pre_vjt -= pos_thres

From 45b40d107e447fbc01e6076665217c9c90bbf661 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 30 Aug 2024 23:15:16 +0800
Subject: [PATCH 051/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix:=20fix=20bit=20tr?=
 =?UTF-8?q?uncation=20of=20neuron?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/base.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index f87e41a6..e68167a2 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -374,14 +374,16 @@ def init_param(self, param: Any) -> np.ndarray:
 
     @staticmethod
     def _truncate(v: VoltageType, bit_trunc: int) -> VoltageType:
-        if np.any((v >> bit_trunc) > 0):  # Saturate truncation
-            return np.full_like(v, _mask(8))
-        elif bit_trunc == 0:
-            return np.full_like(v, 0)
-        elif bit_trunc < 8:
-            return (v << (8 - bit_trunc)) & _mask(8)
-        else:
-            return (v >> (bit_trunc - 8)) & _mask(8)
+        def _truncate_below_u8(vt):
+            if bit_trunc == 0:
+                return 0
+            elif bit_trunc < 8:
+                return (vt << (8 - bit_trunc)) & _mask(8)
+            else:
+                return (vt >> (bit_trunc - 8)) & _mask(8)
+
+        # Saturate truncation
+        return np.where((v >> bit_trunc) > 0, _mask(8), _truncate_below_u8(v))
 
     @property
     def _vjt0(self) -> VoltageType:

From 22f524e92fc1d2023196e807c37b41fcdbcff061 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sat, 31 Aug 2024 10:57:12 +0800
Subject: [PATCH 052/187] =?UTF-8?q?=F0=9F=94=A8=20set=20bias=20to=20`DataT?=
 =?UTF-8?q?ype`=20&=20default=20value=20to=200?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/neurons.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 4824bd86..5ee91e8e 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -89,7 +89,7 @@ def __init__(
         threshold: int,
         reset_v: Optional[int] = None,
         leak_v: int = 0,
-        bias: Optional[DataType] = None,
+        bias: DataType = 0,
         neg_threshold: Optional[int] = None,
         *,
         keep_shape: bool = True,
@@ -123,12 +123,10 @@ def __init__(
             _reset_v = 0
             _rm = RM.MODE_LINEAR
 
-        if isinstance(bias, (list, tuple, np.ndarray)):
-            _bias = np.asarray(bias, dtype=LEAK_V_DTYPE)
-        elif bias is not None:
-            _bias = int(bias)
+        if isinstance(bias, np.ndarray):
+            _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
         else:
-            _bias = 0
+            _bias = int(bias)
 
         # Support passing in bias & leak_v at the same time
         _leak_v = leak_v + _bias
@@ -278,7 +276,7 @@ class ANNNeuron(LIF):
     def __init__(
         self,
         shape: Shape,
-        bias: Optional[DataType] = None,
+        bias: DataType = 0,
         bit_trunc: int = 8,
         *,
         keep_shape: bool = True,
@@ -306,5 +304,5 @@ def __init__(
         **kwargs,
     ) -> None:
         super().__init__(
-            shape, bias=None, bit_trunc=8, keep_shape=keep_shape, name=name, **kwargs
+            shape, bias=0, bit_trunc=8, keep_shape=keep_shape, name=name, **kwargs
         )

From 6af4a4af38e8d98aaff7146c617bc2671999bb76 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sat, 31 Aug 2024 11:26:46 +0800
Subject: [PATCH 053/187] =?UTF-8?q?=F0=9F=9A=9A=20rename=20`Conv2dSemiFold?=
 =?UTF-8?q?edForward`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/synapses/__init__.py   |  2 +-
 paibox/components/synapses/base.py       | 13 +++++++---
 paibox/components/synapses/conv_utils.py |  2 +-
 paibox/components/synapses/transforms.py | 31 ++++++++----------------
 4 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/paibox/components/synapses/__init__.py b/paibox/components/synapses/__init__.py
index 61177dfc..0aefda77 100644
--- a/paibox/components/synapses/__init__.py
+++ b/paibox/components/synapses/__init__.py
@@ -1,2 +1,2 @@
-from .base import Conv2dHalfRollSyn, FullConnectedSyn, FullConnSyn, MaxPool2dSemiMapSyn
+from .base import Conv2dSemiFoldedSyn, FullConnectedSyn, FullConnSyn, MaxPool2dSemiMapSyn
 from .transforms import ConnType
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index ef69a7c7..3ba2d72a 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -18,7 +18,7 @@
     ConnType,
     Conv1dForward,
     Conv2dForward,
-    Conv2dHalfForward,
+    Conv2dSemiFoldedForward,
     ConvTranspose1dForward,
     ConvTranspose2dForward,
     Identity,
@@ -328,7 +328,8 @@ def __init__(
         )
 
 
-class Conv2dHalfRollSyn(FullConnectedSyn):
+class Conv2dSemiFoldedSyn(FullConnectedSyn):
+    _spatial_ndim: ClassVar[int] = 1
 
     def __init__(
         self,
@@ -359,7 +360,13 @@ def __init__(
         if in_ch != in_channels:
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
-        self.comm = Conv2dHalfForward(
+        if (_output_size := out_channels * out_h) != dest.num_in:
+            raise ShapeError(
+                f"output size mismatch: {_output_size} ({out_channels}*{out_h}) "
+                f"!= {dest.num_in}."
+            )
+
+        self.comm = Conv2dSemiFoldedForward(
             (in_ch, in_h), (out_channels, out_h), _kernel, stride, padding
         )
 
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 6cadac69..9c96cb58 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -183,7 +183,7 @@ def _conv2d_unroll(
     return w_unrolled
 
 
-def _conv2d_halfroll(
+def _conv2d_semifolded_unroll(
     in_shape: Size2Type,
     out_shape: Size2Type,
     kernel: WeightType,
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 5296736b..71d76273 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -22,8 +22,8 @@
     _conv1d_faster,
     _conv1d_unroll,
     _conv2d_faster,
-    _conv2d_halfroll,
     _conv2d_unroll,
+    _conv2d_semifolded_unroll,
     _convtranspose1d_faster,
     _convtranspose1d_unroll,
     _convtranspose2d_faster,
@@ -39,7 +39,9 @@
     "AllToAll",
     "Identity",
     "MaskedLinear",
+    "Conv1dForward",
     "Conv2dForward",
+    "Conv2dSemiFoldedForward",
     "ConvTranspose1dForward",
     "ConvTranspose2dForward",
 ]
@@ -395,32 +397,19 @@ def connectivity(self):
         )
 
 
-class Conv2dHalfForward(Transform):
-    def __init__(
-        self,
-        in_shape: Size2Type,
-        out_shape: Size2Type,
-        kernel: np.ndarray,
-        stride: Size2Type,
-        padding: Size2Type,
-        # fm_order: _Order3d,
-    ) -> None:
-        self.in_shape = in_shape
-        self.out_shape = out_shape
-        self.stride = stride
-        self.padding = padding
-        self.kernel = kernel
-        # self.fm_order = fm_order
-
-        super().__init__(kernel)
+class Conv2dSemiFoldedForward(_ConvNdForward):
+    in_shape: Size2Type
+    out_shape: Size2Type
+    stride: Size2Type
+    padding: Size2Type
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         return x @ self.connectivity
 
     @property
     def connectivity(self):
-        return _conv2d_halfroll(
-            self.in_shape, self.out_shape, self.kernel, self.stride, self.padding
+        return _conv2d_semifolded_unroll(
+            self.in_shape, self.out_shape, self.weights, self.stride, self.padding
         )
 
 

From e10e5fcb7cde8691f033e28abebbe9d4e789dcc0 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sat, 31 Aug 2024 14:17:20 +0800
Subject: [PATCH 054/187] =?UTF-8?q?=F0=9F=9A=9A=20move=20conv=20golden=20r?=
 =?UTF-8?q?ef=20functions=20to=20utils?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/synapses/test_transforms.py | 84 +-------------------
 tests/components/utils.py                    | 78 ++++++++++++++++++
 2 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index d94e21b9..e413322a 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -6,6 +6,8 @@
 from paibox.types import WEIGHT_DTYPE
 from paibox.utils import shape2num
 
+from ..utils import _conv1d_golden, _conv2d_golden
+
 
 class TestTransforms:
     @pytest.mark.parametrize(
@@ -238,41 +240,6 @@ def test_MaskedLinear(
         assert np.array_equal(y2, expected.ravel())
         assert f.connectivity.shape == (x.size, y.size)
 
-    @staticmethod
-    def _conv1d_golden(
-        x: np.ndarray,
-        out_shape: tuple[int],
-        kernel: np.ndarray,
-        stride: tuple[int],
-        padding: tuple[int],
-    ):
-        cout, cin, kl = kernel.shape
-        xcin, il = x.shape
-
-        assert cin == xcin
-
-        ol = (il - kl + 2 * padding[0]) // stride[0] + 1
-
-        assert ol == out_shape[0]
-
-        out = np.zeros((cout,) + out_shape, dtype=np.int64)
-
-        x_padded = np.pad(x, ((0, 0), (padding[0], padding[0])), mode="constant")
-        conv_result = np.zeros((ol,), dtype=np.int64)
-
-        for o in range(cout):
-            for i in range(cin):
-                conv_result.fill(0)
-                for l in range(ol):
-                    window = x_padded[i, l * stride[0] : l * stride[0] + kl].astype(
-                        np.int64
-                    )
-                    conv_result[l] = np.sum(window * kernel[o, i, :])
-
-                out[o] += conv_result
-
-        return out
-
     @pytest.mark.parametrize(
         "xdtype, in_shape, in_channels, out_channels, kernel_size, stride, padding, kdtype",
         [
@@ -346,7 +313,7 @@ def test_Conv1dForward(
         # The result of matmul using the unrolled matrix
         y2 = xf @ f.connectivity.astype(np.int32)
 
-        expected = self._conv1d_golden(x, out_shape, kernel, stride, padding)
+        expected = _conv1d_golden(x, out_shape, kernel, stride, padding)
 
         assert np.array_equal(y1, expected)
         assert np.array_equal(y2, expected.ravel())
@@ -355,49 +322,6 @@ def test_Conv1dForward(
             shape2num((kernel.shape[0],) + out_shape),
         )
 
-    @staticmethod
-    def _conv2d_golden(
-        x: np.ndarray,
-        out_shape: tuple[int, int],
-        kernel: np.ndarray,
-        stride: tuple[int, int],
-        padding: tuple[int, int],
-    ):
-        cout, cin, kh, kw = kernel.shape
-        xcin, ih, iw = x.shape
-
-        assert cin == xcin
-
-        oh = (ih - kh + 2 * padding[0]) // stride[0] + 1
-        ow = (iw - kw + 2 * padding[1]) // stride[1] + 1
-
-        assert oh, ow == out_shape
-
-        out = np.zeros((cout,) + out_shape, dtype=np.int64)
-
-        x_padded = np.pad(
-            x,
-            ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-            mode="constant",
-        )
-        conv_result = np.zeros((oh, ow), dtype=np.int64)
-
-        for o in range(cout):
-            for i in range(cin):
-                conv_result.fill(0)
-                for h in range(oh):
-                    for w in range(ow):
-                        window = x_padded[
-                            i,
-                            h * stride[0] : h * stride[0] + kh,
-                            w * stride[1] : w * stride[1] + kw,
-                        ].astype(np.int64)
-                        conv_result[h, w] = np.sum(window * kernel[o, i, :, :])
-
-                out[o] += conv_result
-
-        return out
-
     @pytest.mark.parametrize(
         "xdtype, in_shape, in_channels, out_channels, kernel_size, stride, padding, kdtype",
         [
@@ -469,7 +393,7 @@ def test_Conv2dForward(
         # The result of matmul using the unrolled matrix
         y2 = xf @ f.connectivity.astype(np.int32)
 
-        expected = self._conv2d_golden(x, out_shape, kernel, stride, padding)
+        expected = _conv2d_golden(x, out_shape, kernel, stride, padding)
 
         assert np.array_equal(y1, expected)
         assert np.array_equal(y2, expected.ravel())
diff --git a/tests/components/utils.py b/tests/components/utils.py
index f84c0cab..3e399444 100644
--- a/tests/components/utils.py
+++ b/tests/components/utils.py
@@ -5,6 +5,84 @@
 from paibox.types import WEIGHT_DTYPE, NeuOutType, SpikeType
 
 
+def _conv1d_golden(
+    x: np.ndarray,
+    out_shape: tuple[int],
+    kernel: np.ndarray,
+    stride: tuple[int],
+    padding: tuple[int],
+):
+    cout, cin, kl = kernel.shape
+    xcin, il = x.shape
+
+    assert cin == xcin
+
+    ol = (il - kl + 2 * padding[0]) // stride[0] + 1
+
+    assert ol == out_shape[0]
+
+    out = np.zeros((cout,) + out_shape, dtype=np.int64)
+
+    x_padded = np.pad(x, ((0, 0), (padding[0], padding[0])), mode="constant")
+    conv_result = np.zeros((ol,), dtype=np.int64)
+
+    for o in range(cout):
+        for i in range(cin):
+            conv_result.fill(0)
+            for l in range(ol):
+                window = x_padded[i, l * stride[0] : l * stride[0] + kl].astype(
+                    np.int64
+                )
+                conv_result[l] = np.sum(window * kernel[o, i, :])
+
+            out[o] += conv_result
+
+    return out
+
+
+def _conv2d_golden(
+    x: np.ndarray,
+    out_shape: tuple[int, int],
+    kernel: np.ndarray,
+    stride: tuple[int, int],
+    padding: tuple[int, int],
+):
+    cout, cin, kh, kw = kernel.shape
+    xcin, ih, iw = x.shape
+
+    assert cin == xcin
+
+    oh = (ih - kh + 2 * padding[0]) // stride[0] + 1
+    ow = (iw - kw + 2 * padding[1]) // stride[1] + 1
+
+    assert oh, ow == out_shape
+
+    out = np.zeros((cout,) + out_shape, dtype=np.int64)
+
+    x_padded = np.pad(
+        x,
+        ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
+        mode="constant",
+    )
+    conv_result = np.zeros((oh, ow), dtype=np.int64)
+
+    for o in range(cout):
+        for i in range(cin):
+            conv_result.fill(0)
+            for h in range(oh):
+                for w in range(ow):
+                    window = x_padded[
+                        i,
+                        h * stride[0] : h * stride[0] + kh,
+                        w * stride[1] : w * stride[1] + kw,
+                    ].astype(np.int64)
+                    conv_result[h, w] = np.sum(window * kernel[o, i, :, :])
+
+            out[o] += conv_result
+
+    return out
+
+
 def maxpool1d_golden(
     x: SpikeType,
     kernel_size: tuple[int],

From e9ec7115e3f1a21b42852d9d4a4b80dd7d93f020 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Sep 2024 15:08:06 +0800
Subject: [PATCH 055/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20delay?=
 =?UTF-8?q?=20chain=20as=20an=20internal=20module?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/__init__.py              |   4 +-
 paibox/components/_modules.py   | 126 +++++++++++++++++++++++++++++++-
 paibox/components/functional.py |  96 +-----------------------
 paibox/types.py                 |   3 +-
 4 files changed, 129 insertions(+), 100 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index d132bdfe..261272e8 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -9,9 +9,7 @@
 from .components.functional import BitwiseNOT as BitwiseNOT
 from .components.functional import BitwiseOR as BitwiseOR
 from .components.functional import BitwiseXOR as BitwiseXOR
-from .components.functional import Conv2dSemiMap as Conv2dSemiMap
-from .components.functional import Delay_FullConn as DelayFullConn
-from .components.functional import DelayChain as DelayChain
+
 from .components.functional import Filter as Filter
 from .components.functional import Linear as Linear
 from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index e998cb80..d30b898c 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,17 +1,32 @@
-from typing import Literal, Optional, Union
+from typing import Literal, Optional, Protocol, Union
 
 import numpy as np
 from paicorelib import TM
 
-from paibox.base import NeuDyn
+from paibox.base import NeuDyn, NodeList
 from paibox.network import DynSysGroup
-from paibox.types import NEUOUT_U8_DTYPE, WEIGHT_DTYPE, NeuOutType, VoltageType
-from paibox.utils import arg_check_non_neg, shape2num, typical_round
+from paibox.types import (
+    LEAK_V_DTYPE,
+    NEUOUT_U8_DTYPE,
+    WEIGHT_DTYPE,
+    DataType,
+    NeuOutType,
+    Shape,
+    VoltageType,
+)
+from paibox.utils import (
+    arg_check_non_neg,
+    arg_check_pos,
+    as_shape,
+    shape2num,
+    typical_round,
+)
 
 from .modules import (
     BuiltComponentType,
     FunctionalModule,
     FunctionalModuleWithV,
+    set_rt_mode_ann,
     set_rt_mode_snn,
 )
 from .neuron import Neuron
@@ -29,13 +44,116 @@
 )
 
 __all__ = [
+    "_DelayChainANN",
+    "_DelayChainSNN",
     "_SpikingPool1d",
     "_SpikingPool1dWithV",
     "_SpikingPool2d",
     "_SpikingPool2dWithV",
+    "_HasSemiFoldedIntf",
+    "_LinearBase",
 ]
 
 
+class _DelayChainBase(FunctionalModule):
+    def __init__(
+        self,
+        neuron: Union[NeuDyn, InputProj],
+        chain_level: int = 1,
+        *,
+        keep_shape: bool = True,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """Delay chain. It will add extra neurons (and identity synapses) as buffer.
+
+        Args:
+            - neuron: the target neuron to be delayed.
+            - chain_level: the level of delay chain.
+
+        NOTE: the inherent delay of the module depends on `chain_level`.
+        """
+        if keep_shape:
+            shape_out = neuron.shape_out
+        else:
+            shape_out = (neuron.num_out,)
+
+        self.chain_level = arg_check_pos(chain_level, "chain level")
+        self.inherent_delay = chain_level - 1
+
+        super().__init__(
+            neuron,
+            shape_out=shape_out,
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        return x1
+
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        n_delaychain = NodeList()
+        s_delaychain = NodeList()
+
+        # Delay chain of length #D.
+        for i in range(self.chain_level - 1):
+            n_delay = BypassNeuron(
+                self.shape_out,
+                tick_wait_start=self.tick_wait_start + i,
+                tick_wait_end=self.tick_wait_end,
+                delay=1,
+                name=f"n{i}_{self.name}",
+                **self.rt_mode_kwds,
+            )
+            n_delaychain.append(n_delay)
+
+        # delay = delay_relative for output neuron
+        n_out = BypassNeuron(
+            self.shape_out,
+            tick_wait_start=self.tick_wait_start + i + 1,
+            tick_wait_end=self.tick_wait_end,
+            delay=self.delay_relative,
+            name=f"n{i + 1}_{self.name}",
+            **self.rt_mode_kwds,
+        )
+        n_delaychain.append(n_out)  # Must append to the last.
+
+        syn_in = FullConnSyn(
+            self.module_intf.operands[0],
+            n_delaychain[0],
+            1,
+            conn_type=ConnType.One2One,
+            name=f"s0_{self.name}",
+        )
+
+        for i in range(self.chain_level - 1):
+            s_delay = FullConnSyn(
+                n_delaychain[i],
+                n_delaychain[i + 1],
+                1,
+                conn_type=ConnType.One2One,
+                name=f"s{i + 1}_{self.name}",
+            )
+
+            s_delaychain.append(s_delay)
+
+        generated = [*n_delaychain, syn_in, *s_delaychain]
+        self._rebuild_out_intf(network, n_out, *generated, **build_options)
+
+        return generated
+
+
+@set_rt_mode_snn()
+class _DelayChainSNN(_DelayChainBase):
+    pass
+
+
+@set_rt_mode_ann()
+class _DelayChainANN(_DelayChainBase):
+    pass
+
+
 @set_rt_mode_snn()
 class _SpikingPool1d(FunctionalModule):
     inherent_delay = 0
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index d3ff7c2f..ea2bf896 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -11,11 +11,12 @@
 from paibox.exceptions import PAIBoxDeprecationWarning, ShapeError
 from paibox.network import DynSysGroup
 from paibox.types import (
+    LEAK_V_DTYPE,
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
-    DataArrayType,
+    WEIGHT_DTYPE,
     IntScalarType,
-    LeakVType,
+    DataType,
     NeuOutType,
     VoltageType,
     WeightType,
@@ -30,6 +31,7 @@
     FunctionalModule2to1WithV,
     TransposeModule,
     set_rt_mode,
+    set_rt_mode_ann,
     set_rt_mode_snn,
 )
 from .neuron import Neuron
@@ -50,7 +52,6 @@
     "BitwiseNOT",
     "BitwiseOR",
     "BitwiseXOR",
-    "DelayChain",
     "SpikingAdd",
     "SpikingAvgPool1d",
     "SpikingAvgPool1dWithV",
@@ -347,95 +348,6 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-class DelayChain(FunctionalModule):
-    def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        chain_level: int = 1,
-        *,
-        keep_shape: bool = True,
-        name: Optional[str] = None,
-        **kwargs,
-    ) -> None:
-        """Delay chain. It will add extra neurons (and identity synapses) as buffer.
-
-        Args:
-            - neuron: the target neuron to be delayed.
-            - chain_level: the level of delay chain.
-
-        NOTE: the inherent delay of the module depends on `chain_level`.
-        """
-        if keep_shape:
-            shape_out = neuron.shape_out
-        else:
-            shape_out = (neuron.num_out,)
-
-        self.chain_level = arg_check_pos(chain_level, "chain level")
-        self.inherent_delay = chain_level - 1
-
-        super().__init__(
-            neuron,
-            shape_out=shape_out,
-            keep_shape=keep_shape,
-            name=name,
-            **kwargs,
-        )
-
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        return x1
-
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        n_delaychain = NodeList()
-        s_delaychain = NodeList()
-
-        # Delay chain of length #D.
-        for i in range(self.chain_level - 1):
-            n_delay = SpikingRelu(
-                self.shape_out,
-                tick_wait_start=self.tick_wait_start + i,
-                tick_wait_end=self.tick_wait_end,
-                delay=1,
-                name=f"n{i}_{self.name}",
-                **self.rt_mode_kwds,
-            )
-            n_delaychain.append(n_delay)
-
-        # delay = delay_relative for output neuron
-        n_out = SpikingRelu(
-            self.shape_out,
-            tick_wait_start=self.tick_wait_start + i + 1,
-            tick_wait_end=self.tick_wait_end,
-            delay=self.delay_relative,
-            name=f"n{i + 1}_{self.name}",
-            **self.rt_mode_kwds,
-        )
-        n_delaychain.append(n_out)  # Must append to the last.
-
-        syn_in = FullConnSyn(
-            self.module_intf.operands[0],
-            n_delaychain[0],
-            1,
-            conn_type=ConnType.One2One,
-            name=f"s0_{self.name}",
-        )
-
-        for i in range(self.chain_level - 1):
-            s_delay = FullConnSyn(
-                n_delaychain[i],
-                n_delaychain[i + 1],
-                1,
-                conn_type=ConnType.One2One,
-                name=f"s{i + 1}_{self.name}",
-            )
-
-            s_delaychain.append(s_delay)
-
-        generated = [*n_delaychain, syn_in, *s_delaychain]
-        self._rebuild_out_intf(network, n_out, *generated, **build_options)
-
-        return generated
-
-
 @set_rt_mode_snn()
 class SpikingAdd(FunctionalModule2to1WithV):
     inherent_delay = 0
diff --git a/paibox/types.py b/paibox/types.py
index f963f13e..65a2df99 100644
--- a/paibox/types.py
+++ b/paibox/types.py
@@ -15,9 +15,10 @@
 IntScalarType = TypeVar("IntScalarType", int, np.bool_, np.integer)
 DataType = TypeVar("DataType", int, np.bool_, np.integer, np.ndarray)
 
-LEAK_V_DTYPE = np.int32
 SPIKE_DTYPE = np.bool_
 VOLTAGE_DTYPE = np.int32
+LEAK_V_DTYPE = VOLTAGE_DTYPE
+THRES_V_DTYPE = VOLTAGE_DTYPE
 WEIGHT_DTYPE = np.int8
 NEUOUT_SPIKE_DTYPE = np.bool_
 NEUOUT_U8_DTYPE = np.uint8

From c5cb3bc9af5856a478c242988fcb73bd65551e71 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Sep 2024 15:13:24 +0800
Subject: [PATCH 056/187] =?UTF-8?q?=F0=9F=8E=A8=20format=20&=20type=20chec?=
 =?UTF-8?q?k=20improved?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/__init__.py                       | 18 +++++++---
 paibox/base.py                           |  9 +++++
 paibox/components/functional.py          | 11 +++---
 paibox/components/synapses/base.py       | 29 +++++++++-------
 paibox/components/synapses/conv_utils.py | 16 +++++----
 paibox/network.py                        | 43 ++++++++++++------------
 6 files changed, 77 insertions(+), 49 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index 261272e8..88abc987 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -1,5 +1,6 @@
 from importlib.metadata import version
 
+# Backend context & mapper
 from .backend import BACKEND_CONFIG as BACKEND_CONFIG
 from .backend import Mapper as Mapper
 
@@ -9,7 +10,8 @@
 from .components.functional import BitwiseNOT as BitwiseNOT
 from .components.functional import BitwiseOR as BitwiseOR
 from .components.functional import BitwiseXOR as BitwiseXOR
-
+from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
+from .components.functional import LinearSemiFolded as LinearSemiFolded
 from .components.functional import Filter as Filter
 from .components.functional import Linear as Linear
 from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
@@ -43,14 +45,22 @@
 from .components.synapses.synapses import ConvTranspose2d as ConvTranspose2d
 from .components.synapses.synapses import FullConn as FullConn
 from .components.synapses.synapses import MatMul2d as MatMul2d
-from .context import FRONTEND_ENV as FRONTEND_ENV
+
+# Network
 from .network import DynSysGroup as DynSysGroup
 from .network import Network as Network
-from .node import NodeDict as NodeDict
-from .node import NodeList as NodeList
+
+# Simulation
 from .simulator import Probe as Probe
 from .simulator import Simulator as Simulator
 
+# Frontend context
+from .context import FRONTEND_ENV as FRONTEND_ENV
+
+# Auxiliary containers
+from .node import NodeDict as NodeDict
+from .node import NodeList as NodeList
+
 try:
     __version__ = version("paibox")
 except Exception:
diff --git a/paibox/base.py b/paibox/base.py
index f0dc8c1a..31e25387 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -257,6 +257,15 @@ def state(self) -> NodeDict:
 
 
 class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
+
+    _delay: int
+    _tws: int
+    """tick_wait_start"""
+    _twe: int
+    """tick_wait_end"""
+    _uf: int
+    """unrolling_factor"""
+
     def __init__(self, name: Optional[str] = None) -> None:
         super().__init__(name)
         self.master_nodes = NodeDict()
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index ea2bf896..24da5434 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -64,7 +64,6 @@
     "Transpose3d",
     "Conv2dSemiMap",
     "Filter",
-    "Delay_FullConn",
     "Linear",
     "MaxPool2dSemiMap",
     "AvgPool2dSemiMap",
@@ -732,11 +731,13 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 
 @deprecated(
-    "'Transpose2d' will be removed in a future version. Use 'MatMul2d' instead.",
+    "'Transpose2d' will be removed in version 1.2.0. Use 'MatMul2d' instead.",
     category=PAIBoxDeprecationWarning,
 )
 @set_rt_mode_snn()
 class Transpose2d(TransposeModule):
+    inherent_delay = 0
+
     def __init__(
         self,
         neuron: Union[NeuDyn, InputProj],
@@ -792,11 +793,13 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 
 @deprecated(
-    "'Transpose3d' will be removed in a future version. Use 'MatMul2d' instead.",
+    "'Transpose3d' will be removed in version 1.2.0. Use 'MatMul2d' instead.",
     category=PAIBoxDeprecationWarning,
 )
 @set_rt_mode_snn()
 class Transpose3d(TransposeModule):
+    inherent_delay = 0
+
     def __init__(
         self,
         neuron: Union[NeuDyn, InputProj],
@@ -1570,7 +1573,7 @@ def _delay_mapping(h: int, cin: int, n: int) -> WeightType:
     #     for j in range(n * cin):
     #         for k in range(h):
     #             mt[i * h + k, j * h + k] = 1
-    mt = np.eye(cin * h, dtype=np.int8)
+    mt = np.eye(cin * h, dtype=WEIGHT_DTYPE)
     return mt
 
 
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 3ba2d72a..b056d052 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -272,7 +272,7 @@ def __init__(
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
         if (_output_size := out_channels * out_l) != dest.num_in:
-            raise ShapeError(f"Output size mismatch: {_output_size} != {dest.num_in}.")
+            raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
         self.comm = Conv1dForward((in_l,), (out_l,), _kernel, stride, padding)
 
@@ -319,7 +319,7 @@ def __init__(
 
         if (_output_size := out_channels * out_h * out_w) != dest.num_in:
             raise ShapeError(
-                f"Output size mismatch: {_output_size} ({out_channels}*{out_h}*{out_w}) "
+                f"output size mismatch: {_output_size} ({out_channels}*{out_h}*{out_w}) "
                 f"!= {dest.num_in}."
             )
 
@@ -338,23 +338,26 @@ def __init__(
         kernel: np.ndarray,
         stride: tuple[int, int],
         padding: tuple[int, int],
-        order: _KOrder4d = "OIHW",
+        order: _KOrder3d,
         name: Optional[str] = None,
     ) -> None:
         super().__init__(source, dest, name)
-        # print("进入halfroll")
-        if order == "IOHW":
+
+        if kernel.ndim != self._spatial_ndim + 2:
+            raise ShapeError(
+                f"convolution kernel dimension must be {self._spatial_ndim + 2}, but got {kernel.ndim}."
+            )
+
+        if order == "IOL":
             _kernel = np.swapaxes(kernel, 0, 1)
         else:
             _kernel = kernel.copy()
 
-        # O,I,H,W
+        # O,I,H
         out_channels, in_channels, kernel_h = _kernel.shape
-        # C,H,W
-        if len(source.shape_out) == 2:
-            in_ch, in_h = source.shape_out
-        else:
-            in_ch, in_h, in_w = _fm_ndim2_check(source.shape_out, "CHW")
+        # I,H
+        assert len(source.shape_out) == 2
+        in_ch, in_h = source.shape_out
         out_h = (in_h + 2 * padding[0] - kernel_h) // stride[0] + 1
 
         if in_ch != in_channels:
@@ -414,7 +417,7 @@ def __init__(
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
         if (_output_size := out_channels * out_l) != dest.num_in:
-            raise ShapeError(f"Output size mismatch: {_output_size} != {dest.num_in}.")
+            raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
         self.comm = ConvTranspose1dForward(
             (in_l,), (out_l,), _kernel, stride, padding, output_padding
@@ -471,7 +474,7 @@ def __init__(
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
         if (_output_size := out_channels * out_h * out_w) != dest.num_in:
-            raise ShapeError(f"Output size mismatch: {_output_size} != {dest.num_in}.")
+            raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
         self.comm = ConvTranspose2dForward(
             (in_h, in_w), (out_h, out_w), _kernel, stride, padding, output_padding
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 9c96cb58..9916410c 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -192,9 +192,9 @@ def _conv2d_semifolded_unroll(
 ) -> WeightType:
     cout, cin, kh = kernel.shape
     ih = in_shape[1] + 2 * padding[0]
-    # ih = in_shape[1]
-    o_ch, oh = out_shape
+    _, oh = out_shape
     w_np = np.zeros((cin * ih, cout * oh), dtype=kernel.dtype)
+
     for i in range(cout):
         for j in range(cin):
             if padding[0] == 0:
@@ -209,11 +209,13 @@ def _conv2d_semifolded_unroll(
                     w_np[
                         j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k
                     ] = kernel[i, j, :]
-            w_np = np.delete(
-                w_np,
-                np.concatenate((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
-                axis=0,
-            )
+
+                w_np = np.delete(
+                    w_np,
+                    np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
+                    axis=0,
+                )
+
     return w_np
 
 
diff --git a/paibox/network.py b/paibox/network.py
index 04f41c12..843ff443 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -1,7 +1,6 @@
 from typing import Optional, Union
-
+import sys
 import numpy as np
-from typing_extensions import TypeAlias
 
 from .base import DynamicSys, SynSys
 from .collector import Collector
@@ -10,6 +9,13 @@
 from .mixin import Container
 from .node import NodeDict
 
+
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+
 __all__ = ["DynSysGroup", "Network"]
 
 
@@ -73,33 +79,28 @@ def __call__(self, **kwargs) -> None:
     def build_fmodule(
         cls, network: "DynSysGroup", **build_options
     ) -> dict[NeuModule, BuiltComponentType]:
-        try:
-            from .components.functional import (
-                AvgPool2dSemiMap,
-                Conv2dSemiMap,
-                Delay_FullConn,
-                MaxPool2dSemiMap,
-            )
-        except ImportError:
-            Conv2dSemiMap, Delay_FullConn = None
+        from .components.functional import (
+            AvgPool2dSemiMap,
+            Conv2dSemiFolded,
+            LinearSemiFolded,
+            MaxPool2dSemiMap,
+        )
+
         generated = dict()
         modules = network.nodes().subset(NeuModule).unique()
         delay = 1
         for module in modules.values():
-            if Conv2dSemiMap is not None and isinstance(module, Conv2dSemiMap):
+            if isinstance(module, Conv2dSemiFolded):
                 generated[module] = module.build(network, delay, **build_options)
-                if module.stride[1] != 1:
-                    delay = delay * module.stride[1]
-            elif Delay_FullConn is not None and isinstance(module, Delay_FullConn):
+                delay *= module.stride[1]  # stride of w > 1
+            elif isinstance(module, LinearSemiFolded):
                 generated[module] = module.build(network, delay, **build_options)
-            elif MaxPool2dSemiMap is not None and isinstance(module, MaxPool2dSemiMap):
+            elif isinstance(module, MaxPool2dSemiMap):
                 generated[module] = module.build(network, delay, **build_options)
-                if module.stride[1] != 1:
-                    delay = delay * module.stride[1]
-            elif AvgPool2dSemiMap is not None and isinstance(module, AvgPool2dSemiMap):
+                delay *= module.stride[1]
+            elif isinstance(module, AvgPool2dSemiMap):
                 generated[module] = module.build(network, delay, **build_options)
-                if module.stride[1] != 1:
-                    delay = delay * module.stride[1]
+                delay *= module.stride[1]
             else:
                 generated[module] = module.build(network, **build_options)
 

From b7cb5516e8db557a3edb7bd5cb00bc3e7891789e Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Sep 2024 15:23:12 +0800
Subject: [PATCH 057/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20`Linear?=
 =?UTF-8?q?`=20&=20`LinearSemiFolded`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py   |  43 ++++++++++
 paibox/components/functional.py | 136 +++++++++-----------------------
 2 files changed, 81 insertions(+), 98 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index d30b898c..870bc3a3 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -154,6 +154,49 @@ class _DelayChainANN(_DelayChainBase):
     pass
 
 
+class _HasSemiFoldedIntf(Protocol):
+    """The front of this module has replication & delay interface for semi-folded convolution."""
+
+    def build(
+        self, network: DynSysGroup, delay: int, **build_options
+    ) -> BuiltComponentType: ...
+
+
+@set_rt_mode_ann()
+class _LinearBase(FunctionalModule):
+    def __init__(
+        self,
+        neuron_s: Union[NeuDyn, InputProj],
+        out_features: Shape,
+        weights: np.ndarray,
+        bias: DataType = 0,
+        bit_trunc: int = 8,
+        *,
+        conn_type: ConnType = ConnType.All2All,
+        keep_shape: bool = False,
+        name: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        self.weights = weights
+        self.conn_type = conn_type
+        self.bit_trunc = bit_trunc
+
+        if isinstance(bias, np.ndarray):
+            _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
+        else:
+            _bias = int(bias)
+
+        self.bias = _bias
+
+        super().__init__(
+            neuron_s,
+            shape_out=as_shape(out_features),
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
+        )
+
+
 @set_rt_mode_snn()
 class _SpikingPool1d(FunctionalModule):
     inherent_delay = 0
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 24da5434..6d7557cc 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -35,6 +35,7 @@
     set_rt_mode_snn,
 )
 from .neuron import Neuron
+from .neuron.base import MetaNeuron
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
@@ -859,99 +860,65 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-@set_rt_mode(8, 8, 0)
-class Delay_FullConn(FunctionalModule):
-    "That operator is used on the first fully connected layer after the semimap-convolution."
-
-    def __init__(
-        self,
-        neuron_s: Union[NeuDyn, InputProj],
-        out_feature: tuple[int, ...],
-        weights: DataArrayType = 1,
-        bias: Union[int, LeakVType] = 0,
-        conn_type: ConnType = ConnType.MatConn,
-        keep_shape: bool = False,
-        name: Optional[str] = None,
-        **kwargs,
-    ) -> None:
-        # self.delay =
-        self.weights = weights
-        self.conn_type = conn_type
-        self.bias = bias
-        _shape_out = out_feature
-        super().__init__(
-            neuron_s,
-            # neuron_d,
-            shape_out=_shape_out,
-            keep_shape=keep_shape,
-            name=name,
-            **kwargs,
-        )
+class LinearSemiFolded(_LinearBase, _HasSemiFoldedIntf):
+    "That operator is used on the first fully-connected layer after the semi-folded convolution."
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        output = x1 @ self.weights
-        return output
+        raise NotImplementedError
 
     def build(
         self, network: DynSysGroup, delay: int, **build_options
     ) -> BuiltComponentType:
-        if len(self.module_intf.operands[0].shape_out) != 2:
-            raise ShapeError(
-                "The source node must be a successor to the half-convolution"
-            )
+        assert len(self.module_intf.operands[0].shape_out) == 2
+
         delay_shape = self.module_intf.operands[0].shape_out
-        delay_neurons = []
-        neuron_d = Neuron(
+        n_delays = NodeList()
+        s_delays = NodeList()
+        s_weight = NodeList()
+
+        n_fc = ANNNeuron(
             self.shape_out,
-            reset_mode=RM.MODE_NONRESET,
-            neg_thres_mode=NTM.MODE_SATURATION,
-            leak_v=self.bias,
-            neg_threshold=0,
-            pos_threshold=0,
+            self.bias,
+            self.bit_trunc,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=self.tick_wait_end,
-            input_width=self.input_width,
-            spike_width=self.spike_width,
-            snn_en=self.snn_en,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+
         for i in range(delay_shape[1]):
-            neuron = Neuron(
+            neuron = ANNBypassNeuron(
                 shape=delay_shape,
-                leak_v=0,
-                neg_threshold=0,
                 delay=delay * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
-                input_width=self.input_width,
-                spike_width=self.spike_width,
-                snn_en=self.snn_en,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
-            delay_neurons.append(neuron)
-            # 延时突触
+            n_delays.append(neuron)
+            # Delay synapses
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
-                delay_neurons[i],
+                neuron,
                 weights=_delay_mapping(delay_shape[1], delay_shape[0], 1),
                 conn_type=ConnType.All2All,
-                name=f"s{i}_delay",
+                name=f"s{i}_delay_{self.name}",
             )
-            # w = np.zeros((neuron.num_out, self.module_intf.operands[1].num_out))
+            s_delays.append(syn1)
+
             w = self.weights[delay_shape[1] - i - 1 :: delay_shape[1], :]
-            syn2 = FullConnSyn(  # cin,(kw-1)*ih -> cout * oh
-                delay_neurons[i],  # 54 -> 54
-                neuron_d,
+            syn2 = FullConnSyn(
+                neuron,
+                n_fc,
                 weights=w,
                 conn_type=self.conn_type,
                 name=f"s{i}_{self.name}",
             )
+            s_weight.append(syn2)
 
-            generated = [neuron_d, *delay_neurons, syn1, syn2]
-            self._rebuild_out_intf(network, neuron_d, *generated, **build_options)
+        generated = [n_fc, *n_delays, *s_delays, *s_weight]
+        self._rebuild_out_intf(network, n_fc, *generated, **build_options)
 
         return generated
 
@@ -1155,53 +1122,26 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-@set_rt_mode(8, 8, 0)
-class Linear(FunctionalModule):
-    "FullConn for ANN mode"
+class Linear(_LinearBase):
+    "Linear layer for ANN."
 
-    def __init__(
-        self,
-        neuron_s: Union[NeuDyn, InputProj],
-        out_feature: tuple[int, ...],
-        weights: DataArrayType = 1,
-        bias: Union[int, LeakVType] = 0,
-        conn_type: ConnType = ConnType.MatConn,
-        keep_shape: bool = False,
-        name: Optional[str] = None,
-        **kwargs,
-    ) -> None:
-        self.weights = weights
-        self.conn_type = conn_type
-        self.bias = bias
-        _shape_out = out_feature
-        super().__init__(
-            neuron_s,
-            shape_out=_shape_out,
-            keep_shape=keep_shape,
-            name=name,
-            **kwargs,
-        )
+    inherent_delay = 0
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        output = x1.ravel() @ self.weights
+        output = x1 @ self.weights.astype(VOLTAGE_DTYPE)
         output = output + self.bias
-        output[output < 0] = 0
-        return output
+        output = np.where(output >= 1, MetaNeuron._truncate(output, self.bit_trunc), 0)
+
+        return output.astype(NEUOUT_U8_DTYPE)
 
-    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
-        neuron_d = Neuron(
+    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+        neuron_d = ANNNeuron(
             self.shape_out,
-            reset_mode=RM.MODE_NONRESET,
-            neg_thres_mode=NTM.MODE_SATURATION,
-            leak_v=self.bias,
-            neg_threshold=0,
-            pos_threshold=0,
+            self.bias,
+            self.bit_trunc,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start,
             tick_wait_end=self.tick_wait_end,
-            input_width=self.input_width,
-            spike_width=self.spike_width,
-            snn_en=self.snn_en,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )

From 26dbb9cc52989adb815bc8739a85ad6eb85b6cd4 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Sep 2024 15:24:15 +0800
Subject: [PATCH 058/187] =?UTF-8?q?=E2=9C=A8=20update=20`Conv2dSemiFolded`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/functional.py | 119 +++++++++++++++-----------------
 1 file changed, 55 insertions(+), 64 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 6d7557cc..51e99b92 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -39,7 +39,7 @@
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, FullConnSyn
+from .synapses import ConnType, FullConnSyn, Conv2dSemiFoldedSyn
 from .synapses.conv_types import _Size1Type, _Size2Type
 from .synapses.conv_utils import _fm_ndim2_check, _pair
 
@@ -63,7 +63,7 @@
     "SpikingSub",
     "Transpose2d",
     "Transpose3d",
-    "Conv2dSemiMap",
+    "Conv2dSemiFolded",
     "Filter",
     "Linear",
     "MaxPool2dSemiMap",
@@ -923,130 +923,121 @@ def build(
         return generated
 
 
-@set_rt_mode(8, 8, 0)
-class Conv2dSemiMap(FunctionalModule):
+@set_rt_mode_ann()
+class Conv2dSemiFolded(FunctionalModule, _HasSemiFoldedIntf):
     _spatial_ndim: ClassVar[int] = 2
 
     def __init__(
         self,
         neuron_s: Union[NeuDyn, InputProj],
-        # neuron_d: Union[NeuDyn, InputProj],
         kernel: np.ndarray,
-        stride: Optional[_Size2Type] = None,
+        stride: _Size2Type = 1,
         padding: _Size2Type = 0,
-        bias: Union[int, LeakVType] = 0,
+        bias: DataType = 0,
+        bit_trunc: int = 8,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """2d conv_semimap for ANN mode."""
-        self.kernel = kernel
-        self.stride = _pair(stride)
-        self.padding = _pair(padding)
-        self.bias = bias
+        """2d semi-folded convolution for ANN mode."""
         if kernel.ndim != self._spatial_ndim + 2:
             raise ShapeError(
                 f"convolution kernel dimension must be {self._spatial_ndim + 2}, but got {kernel.ndim}."
             )
 
-        if len(neuron_s.shape_out) != 2:
-            in_ch, in_h, in_w = neuron_s.shape_out
-        #     in_ch, in_h, in_w = _fm_ndim2_check(neuron_s.shape_out, "CHW")
-        #     neuron_s.shape_change((in_ch, in_h))
+        self.kernel = kernel
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.bit_trunc = bit_trunc
+
+        if isinstance(bias, np.ndarray):
+            _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
         else:
-            (
-                in_ch,
-                in_h,
-            ) = neuron_s.shape_out
-        cout, cin, kh, kw = kernel.shape
+            _bias = int(bias)
+
+        self.bias = _bias
+
+        assert len(neuron_s.shape_out) == 2
+        in_ch, in_h = neuron_s.shape_out
+        # XXX Do not consider the case when the shape of source neurons needs to be changed, for now.
+        # neuron_s.shape_change((in_ch, in_h))
+
+        cout, cin, kh, _ = kernel.shape
         out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
+
         if in_ch != cin:
             raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
 
         _shape_out = (cout, out_h)
 
         super().__init__(
-            neuron_s,
-            # neuron_d,
-            shape_out=_shape_out,
-            keep_shape=keep_shape,
-            name=name,
-            **kwargs,
+            neuron_s, shape_out=_shape_out, keep_shape=keep_shape, name=name, **kwargs
         )
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        print("进入conv.spike_func")
-        # print(x1)
-        # output = _conv2d_faster_fp32(x1, self.kernel, self.stride, self.padding)
-        # output[output < 0] = 0
-        return  # output
+        raise NotImplementedError
 
     def build(
         self, network: DynSysGroup, delay: int, **build_options
     ) -> BuiltComponentType:
-        # print("进入build")
-        if len(self.module_intf.operands[0].shape_out) != 2:
-            in_ch, in_h, in_w = _fm_ndim2_check(
-                self.module_intf.operands[0].shape_out, "CHW"
-            )
-            self.module_intf.operands[0].shape_change((in_ch, in_h))
-        in_ch, in_h = self.module_intf.operands[0].shape_out
-        cout, cin, kh, kw = self.kernel.shape
+        assert len(self.module_intf.operands[0].shape_out) == 2
+        # if len(self.module_intf.operands[0].shape_out) != 2:
+        #     in_ch, in_h, in_w = _fm_ndim2_check(
+        #         self.module_intf.operands[0].shape_out, "CHW"
+        #     )
+        #     self.module_intf.operands[0].shape_change((in_ch, in_h))
+
+        _, in_h = self.module_intf.operands[0].shape_out
+        _, cin, _, kw = self.kernel.shape
+
         n_delays = NodeList()
         s_delays = NodeList()
-        relu = Neuron(
+        s_kernel = NodeList()
+
+        n_conv2d = ANNNeuron(
             self.shape_out,
-            reset_mode=RM.MODE_NONRESET,
-            neg_thres_mode=NTM.MODE_SATURATION,
-            leak_v=self.bias,
-            neg_threshold=0,
-            pos_threshold=0,
+            self.bias,
+            self.bit_trunc,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=self.tick_wait_end,
-            input_width=self.input_width,
-            spike_width=self.spike_width,
-            snn_en=self.snn_en,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+
         for i in range(kw):
-            neuron = Neuron(
+            neuron = ANNBypassNeuron(
                 (cin, in_h),
-                leak_v=0,
-                neg_threshold=0,
                 delay=delay * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
-                input_width=self.input_width,
-                spike_width=self.spike_width,
-                snn_en=self.snn_en,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
             n_delays.append(neuron)
-            # 延时突触
+            # delay synapses
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],  # (2, 5)
+                self.module_intf.operands[0],
                 n_delays[i],
                 weights=_delay_mapping(in_h, cin, 1),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
             s_delays.append(syn1)
-            syn2 = Conv2dHalfRollSyn(  # cin, ih -> cout * oh
-                n_delays[i],
-                relu,
+
+            syn2 = Conv2dSemiFoldedSyn(  # cin, ih -> cout * oh
+                neuron,
+                n_conv2d,
                 kernel=self.kernel[:, :, :, kw - i - 1],
                 stride=self.stride,
                 padding=self.padding,
-                order="OIHW",
+                order="OIL",
                 name=f"s{i}_{self.name}",
             )
-            s_delays.append(syn2)
+            s_kernel.append(syn2)
 
-        generated = [relu, *n_delays, *s_delays]
-        self._rebuild_out_intf(network, relu, *generated, **build_options)
+        generated = [n_conv2d, *n_delays, *s_delays, *s_kernel]
+        self._rebuild_out_intf(network, n_conv2d, *generated, **build_options)
 
         return generated
 

From e1ed86710de442c496c09f598ab01105733c6353 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Sep 2024 15:25:04 +0800
Subject: [PATCH 059/187] =?UTF-8?q?=E2=9C=85=20update=20test=20cases,=20sy?=
 =?UTF-8?q?nc=20changes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/shared_networks.py | 44 +++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 401f8341..8cd7ad5c 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -253,30 +253,48 @@ def __init__(self, shape, axes):
         self.probe2 = pb.Probe(self.n2, "spike")
 
 
-class Conv2dSemiMap_Net1(pb.DynSysGroup):
-    def __init__(self, shape, kernel, stride, padding):
+class Conv2dSemiFolded_1Layer(pb.DynSysGroup):
+    def __init__(self, shape, kernel, stride, padding, bias):
         super().__init__()
 
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.Conv2dSemiMap(
-            self.i1, kernel, stride[0], padding[0], tick_wait_start=1
+        self.conv1 = pb.Conv2dSemiFolded(
+            self.i1, kernel, stride[0], padding[0], bias=bias, tick_wait_start=1
         )
+        
 
+class Conv2dSemiFolded_FC_Net1(pb.DynSysGroup):
+    def __init__(self, shape, kernel, stride, padding, out_features, weight):
+        super().__init__()
 
-class Conv2dSemiMap_Net2(pb.DynSysGroup):
-    def __init__(self, shape, kernel, stride, padding, out_feature, weight):
+        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
+        self.conv1 = pb.Conv2dSemiFolded(
+            self.i1, kernel, stride, padding, tick_wait_start=1
+        )
+        self.linear1 = pb.LinearSemiFolded(
+            self.conv1,
+            out_features,
+            weights=weight,
+            bias=0,
+            conn_type=pb.SynConnType.All2All,
+            tick_wait_start=self.conv1.tick_wait_start + 2,
+        )
+
+
+class Conv2dSemiFolded_FC_Net2(pb.DynSysGroup):
+    def __init__(self, shape, kernel, stride, padding, out_features, weight):
         super().__init__()
 
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.Conv2dSemiMap(
+        self.conv1 = pb.Conv2dSemiFolded(
             self.i1, kernel, stride[0], padding[0], tick_wait_start=1
         )
-        self.conv2 = pb.Conv2dSemiMap(
+        self.conv2 = pb.Conv2dSemiFolded(
             self.conv1, kernel, stride[1], padding[1], tick_wait_start=3
         )
-        self.linear1 = pb.DelayFullConn(
+        self.linear1 = pb.LinearSemiFolded(
             self.conv2,
-            out_feature,
+            out_features,
             weights=weight,
             bias=0,
             conn_type=pb.SynConnType.All2All,
@@ -302,7 +320,7 @@ def __init__(self, shape, kernel_size, stride, weight, pool_type):
             self.pool2 = pb.MaxPool2dSemiMap(
                 self.pool1, kernel_size, stride[1], tick_wait_start=3
             )
-        self.linear1 = pb.DelayFullConn(
+        self.linear1 = pb.LinearSemiFolded(
             self.pool2,
             2,
             weights=weight,
@@ -316,9 +334,7 @@ class Linear_Net(pb.DynSysGroup):
     def __init__(self, shape, weight1):
         super().__init__()
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.linear1 = pb.Linear(
-            self.i1, 10, weights=weight1, bias=2, conn_type=pb.SynConnType.All2All
-        )
+        self.linear1 = pb.Linear(self.i1, 10, weights=weight1, bias=2)
         self.probe1 = pb.Probe(self.linear1, "spike")
 
 

From 867a6bd3f2b1c2390cd00c3c435c03c86f2bc2ad Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Sep 2024 15:28:17 +0800
Subject: [PATCH 060/187] =?UTF-8?q?=E2=9C=85=20parameterize=20`N=5FTEST`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 161 +++++++++++++++-------------
 1 file changed, 86 insertions(+), 75 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 1cc25221..450a34a9 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -33,21 +33,24 @@ def _assert_build_fmodule(
     assert len(nodes) == n_node_aft_build
 
 
+N_TEST = 20
+
+
 class TestFunctionalModules:
     def test_FModule_ConnWithInput(self, build_FModule_ConnWithInput_Net):
         net = build_FModule_ConnWithInput_Net
         bitwise = 10
         sim = pb.Simulator(net, start_time_zero=False)
 
-        inpa = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
-        inpb = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
+        inpb = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
 
         # data2 will input to inp2 which is connected with the AND module.
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i], data2=inpb[i])
             sim.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim.data[net.probe2][i], inpa[i - 1] & inpb[i])
 
         _assert_build_fmodule(net, 4 + 1 + 2, 4 + 3 + 2)
@@ -57,20 +60,20 @@ def test_FModule_ConnWithModule(self, build_FModule_ConnWithModule_Net):
         bitwise = 10
         sim = pb.Simulator(net, start_time_zero=False)
 
-        inpa = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
-        inpb = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
-        inpc = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
+        inpb = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
+        inpc = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
 
-        for t in range(20):
+        for t in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[t], data2=inpb[t], data3=inpc[t])
             sim.run(1)
 
         # At T >= 1, the AND1 will output the valid result.
-        for t in range(1, 20):
+        for t in range(1, N_TEST):
             assert np.array_equal(sim.data[net.probe2][t], inpa[t - 1] & inpb[t - 1])
 
         # At T >= 2, the OR1 will output the valid result.
-        for t in range(2, 20):
+        for t in range(2, N_TEST):
             assert np.array_equal(
                 sim.data[net.probe3][t], (inpa[t - 2] & inpb[t - 2]) | inpc[t - 1]
             )
@@ -99,19 +102,19 @@ def test_BitwiseAND(self):
         probe_func = pb.Probe(generated[func][0], "spike")
         sim2.add_probe(probe_func)
 
-        inpa = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
-        inpb = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
+        inpb = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i], data2=inpb[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim1.data[net1.probe3][i], inpa[i - 1] & inpb[i - 1])
             assert np.array_equal(sim2.data[probe_func][i], inpa[i - 1] & inpb[i - 1])
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             assert np.array_equal(sim1.data[net1.probe4][i], inpa[i - 2] & inpb[i - 2])
 
         _assert_build_fmodule(net1, 6 + 1 + 2, 6 + 3 + 2)
@@ -140,18 +143,18 @@ def test_BitwiseNOT(self):
         probe_func = pb.Probe(generated[func][0], "spike")
         sim2.add_probe(probe_func)
 
-        inpa = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim1.data[net1.probe2][i], ~inpa[i - 1])
             assert np.array_equal(sim2.data[probe_func][i], ~inpa[i - 1])
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             assert np.array_equal(sim1.data[net1.probe3][i], ~inpa[i - 2])
 
         _assert_build_fmodule(net1, 3 + 1 + 2, 3 + 2 + 2)
@@ -180,19 +183,19 @@ def test_BitwiseOR(self):
         probe_func = pb.Probe(generated[func][0], "spike")
         sim2.add_probe(probe_func)
 
-        inpa = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
-        inpb = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
+        inpb = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i], data2=inpb[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim1.data[net1.probe3][i], inpa[i - 1] | inpb[i - 1])
             assert np.array_equal(sim2.data[probe_func][i], inpa[i - 1] | inpb[i - 1])
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             assert np.array_equal(sim1.data[net1.probe4][i], inpa[i - 2] | inpb[i - 2])
 
         _assert_build_fmodule(net1, 6 + 1 + 2, 6 + 3 + 2)
@@ -221,19 +224,19 @@ def test_BitwiseXOR(self):
         probe_func = pb.Probe(generated[func][1], "spike")
         sim2.add_probe(probe_func)
 
-        inpa = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
-        inpb = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
+        inpb = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i], data2=inpb[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             assert np.array_equal(sim1.data[net1.probe3][i], inpa[i - 2] ^ inpb[i - 2])
             assert np.array_equal(sim2.data[probe_func][i], inpa[i - 2] ^ inpb[i - 2])
 
-        for i in range(3, 20):
+        for i in range(3, N_TEST):
             assert np.array_equal(sim1.data[net1.probe4][i], inpa[i - 3] ^ inpb[i - 3])
 
         _assert_build_fmodule(net1, 6 + 1 + 2, 6 + 5 + 2)
@@ -248,7 +251,6 @@ def test_BitwiseXOR_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
-    @pytest.mark.skip(reason="'rt_mode_kwds' is not implemented in DelayChain.")
     def test_DelayChain(self):
         from tests.shared_networks import FunctionalModule_1to1_Net
 
@@ -263,24 +265,23 @@ def test_DelayChain(self):
         probe_func = pb.Probe(generated[func][func.chain_level - 1], "spike")
         sim2.add_probe(probe_func)
 
-        inpa = np.random.randint(0, 2, size=(20, bitwise), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST, bitwise), dtype=np.bool_)
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
 
         _inh_delay = net1.func_node.inherent_delay
-        for i in range(1 + _inh_delay, 20):
+        for i in range(1 + _inh_delay, N_TEST):
             assert np.array_equal(sim1.data[net1.probe2][i], inpa[i - 1 - _inh_delay])
             assert np.array_equal(sim2.data[probe_func][i], inpa[i - 1 - _inh_delay])
 
-        for i in range(2 + _inh_delay, 20):
+        for i in range(2 + _inh_delay, N_TEST):
             assert np.array_equal(sim1.data[net1.probe3][i], inpa[i - 2 - _inh_delay])
 
         _assert_build_fmodule(net1, 3 + 1 + 2, 3 + 2 * net1.func_node.chain_level + 2)
 
-    @pytest.mark.skip(reason="'rt_mode_kwds' is not implemented in DelayChain.")
     def test_DelayChain_mapping(self, ensure_dump_dir):
         from tests.shared_networks import FunctionalModule_1to1_Net
 
@@ -318,16 +319,16 @@ def test_SpikingAdd(self):
         inpb = np.tile(_base_b, (10, 1)).T
         expected = np.tile(_base_expected, (10, 1)).T
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i], data2=inpb[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim1.data[net1.probe3][i], expected[i])
             assert np.array_equal(sim2.data[probe_func][i], expected[i])
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             assert np.array_equal(sim1.data[net1.probe4][i], expected[i - 1])
 
         _assert_build_fmodule(net1, 6 + 1 + 2, 6 + 3 + 2)
@@ -369,16 +370,16 @@ def test_SpikingSub(self):
         inpb = np.tile(_base_b, (10, 1)).T
         expected = np.tile(_base_expected, (10, 1)).T
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i], data2=inpb[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim1.data[net1.probe3][i], expected[i])
             assert np.array_equal(sim2.data[probe_func][i], expected[i])
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             assert np.array_equal(sim1.data[net1.probe4][i], expected[i - 1])
 
         _assert_build_fmodule(net1, 6 + 1 + 2, 6 + 3 + 2)
@@ -444,9 +445,11 @@ def test_SpikingPool1d(
         sim2.add_probe(probe_p1d)
 
         # Use binomial distribution to generate a sparse matrix with more zeros
-        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
+        inpa = np.random.binomial(1, p_binomial, size=(N_TEST,) + fm_shape).astype(
+            np.bool_
+        )
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
@@ -458,7 +461,7 @@ def test_SpikingPool1d(
         else:
             _threshold = typical_round(shape2num(ksize) / 2)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             if pool_type == "avg":
                 expected = avgpool1d_golden(
                     inpa[i - 1], ksize, _stride, _padding, fm_order, _threshold
@@ -471,7 +474,7 @@ def test_SpikingPool1d(
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
             assert np.array_equal(sim2.data[probe_p1d][i], expected)
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             if pool_type == "avg":
                 expected = avgpool1d_golden(
                     inpa[i - 2], ksize, _stride, _padding, fm_order, _threshold
@@ -552,9 +555,11 @@ def test_SpikingPool2d(
         sim2.add_probe(probe_p2d)
 
         # Use binomial distribution to generate a sparse matrix with more zeros
-        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
+        inpa = np.random.binomial(1, p_binomial, size=(N_TEST,) + fm_shape).astype(
+            np.bool_
+        )
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
@@ -566,7 +571,7 @@ def test_SpikingPool2d(
         else:
             _threshold = typical_round(shape2num(ksize) / 2)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             if pool_type == "avg":
                 expected = avgpool2d_golden(
                     inpa[i - 1], ksize, _stride, _padding, fm_order, _threshold
@@ -579,7 +584,7 @@ def test_SpikingPool2d(
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
             assert np.array_equal(sim2.data[probe_p2d][i], expected)
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             if pool_type == "avg":
                 expected = avgpool2d_golden(
                     inpa[i - 2], ksize, _stride, _padding, fm_order, _threshold
@@ -634,14 +639,16 @@ def test_SpikingAvgPool1dWithV(
         sim2.add_probe(probe_p1d)
 
         # Use binomial distribution to generate a sparse matrix with more zeros
-        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
+        inpa = np.random.binomial(1, p_binomial, size=(N_TEST,) + fm_shape).astype(
+            np.bool_
+        )
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim1.data[net1.probe2][i], sim2.data[probe_p1d][i])
 
     def test_SpikingAvgPool1dWithV_mapping(self, ensure_dump_dir):
@@ -694,14 +701,16 @@ def test_SpikingAvgPool2dWithV(
         sim2.add_probe(probe_p2d)
 
         # Use binomial distribution to generate a sparse matrix with more zeros
-        inpa = np.random.binomial(1, p_binomial, size=(20,) + fm_shape).astype(np.bool_)
+        inpa = np.random.binomial(1, p_binomial, size=(N_TEST,) + fm_shape).astype(
+            np.bool_
+        )
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(1, 20):
+        for i in range(1, N_TEST):
             assert np.array_equal(sim1.data[net1.probe2][i], sim2.data[probe_p2d][i])
 
     def test_SpikingAvgPool2dWithV_mapping(self, ensure_dump_dir):
@@ -729,19 +738,19 @@ def test_Transpose2d(self, shape):
         probe_t2d = pb.Probe(generated[t2d][0], "spike")
         sim2.add_probe(probe_t2d)
 
-        inpa = np.random.randint(0, 2, size=(20,) + as_shape(shape), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST,) + as_shape(shape), dtype=np.bool_)
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             expected = inpa[i - 1].T.ravel()
             assert np.array_equal(sim1.data[net1.probe1][i], expected)
             assert np.array_equal(sim2.data[probe_t2d][i], expected)
 
-        for i in range(3, 20):
+        for i in range(3, N_TEST):
             expected = inpa[i - 2].T.ravel()
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
 
@@ -782,19 +791,19 @@ def test_Transpose3d(self, shape, axes):
         if len(shape) == 2:
             shape = (1,) + shape
 
-        inpa = np.random.randint(0, 2, size=(20,) + as_shape(shape), dtype=np.bool_)
+        inpa = np.random.randint(0, 2, size=(N_TEST,) + as_shape(shape), dtype=np.bool_)
 
-        for i in range(20):
+        for i in range(N_TEST):
             pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
 
-        for i in range(2, 20):
+        for i in range(2, N_TEST):
             expected = inpa[i - 1].transpose(axes).ravel()
             assert np.array_equal(sim1.data[net1.probe1][i], expected)
             assert np.array_equal(sim2.data[probe_t3d][i], expected)
 
-        for i in range(3, 20):
+        for i in range(3, N_TEST):
             expected = inpa[i - 2].transpose(axes).ravel()
             assert np.array_equal(sim1.data[net1.probe2][i], expected)
 
@@ -1015,31 +1024,33 @@ def test_Pool2dSemiMap(self, shape, kernel_size, stride, weight, pool_type):
             else:
                 expected = expected & ((1 << 8) - 1)
             assert np.array_equal(expected, sim1.data[probe_linear][12])
-        # print(sim1.data[probe_pool])
-        # print(sim1.data[probe_linear])
 
-    @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
-        "shape, weight1",
+        "shape, weight",
         [
-            ((3, 5, 5), np.random.randint(-5, 5, size=(3 * 5 * 5, 10), dtype=np.int8)),
+            ((3, 5, 5), np.random.randint(0, 5, size=(3 * 5 * 5, 10), dtype=np.int8)),
+            ((10,), np.random.randint(0, 5, size=(10, 10), dtype=np.int8)),
         ],
     )
-    def test_Linear(self, shape, weight1):
+    def test_Linear(self, shape, weight):
         from tests.shared_networks import Linear_Net
 
-        net1 = Linear_Net(shape, weight1)
-        net2 = Linear_Net(shape, weight1)
+        net1 = Linear_Net(shape, weight)
+        net2 = Linear_Net(shape, weight)
         linear = net2.linear1
         generated = pb.DynSysGroup.build_fmodule(net2)
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
+
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim2.add_probe(probe_linear)
-        inpa = np.random.randint(0, 10, (3, 5, 5), dtype=np.int8)
-        for i in range(1):
-            pb.FRONTEND_ENV.save(data1=inpa)
+
+        inpa = np.random.randint(0, 10, (N_TEST,) + shape, dtype=np.uint8)
+
+        for i in range(N_TEST):
+            pb.FRONTEND_ENV.save(data1=inpa[i])
             sim1.run(1)
             sim2.run(1)
 
-        assert np.array_equal(sim1.data[net1.probe1][0], sim2.data[probe_linear][0])
+        for i in range(N_TEST):
+            assert np.array_equal(sim1.data[net1.probe1][i], sim2.data[probe_linear][i])

From f2bd5e39be0fab8bd1144fcfc7b3f7fcc91bc853 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Sep 2024 15:29:23 +0800
Subject: [PATCH 061/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20simi-fo?=
 =?UTF-8?q?lded=20conv2d=20test=20cases=202/3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 207 +++++++++++++++++++++-------
 1 file changed, 155 insertions(+), 52 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 450a34a9..5358ee93 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -818,64 +818,165 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.compile()
         mapper.export(fp=ensure_dump_dir)
 
-    @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
-        "shape, kernel, stride, padding",
+        "ishape_chw, kshape_oihw, stride, padding, bias",
         [
-            (
-                (3, 11),
-                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-                [1, 1],
-                [0, 0],
-            ),
-            (
-                (3, 11),
-                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-                [1, 2],
-                [0, 0],
-            ),
-            (
-                (3, 11),
-                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-                [2, 1],
-                [0, 0],
-            ),
-            (
-                (3, 11),
-                np.random.randint(-2, 3, size=(1, 3, 3, 3), dtype=np.int8),
-                [2, 2],
-                [0, 0],
-            ),
+            ((3, 11, 11), (6, 3, 3, 3), 1, 0, 0),
+            ((12, 24, 24), (24, 12, 3, 3), 1, 0, 0),
+            ((16, 32, 32), (24, 16, 4, 4), 1, 0, 0),
+            ((12, 24, 24), (4, 12, 3, 3), 2, 0, 0),
+            ((12, 32, 32), (32, 12, 3, 3), 3, 0, 0),
+            ((12, 28, 28), (4, 12, 3, 3), 2, 0, 0),
         ],
     )
-    def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
-        from tests.shared_networks import Conv2dSemiMap_Net1
+    def test_Conv2dSemiFolded_1Layer(
+        self, ishape_chw, kshape_oihw, stride, padding, bias, random_fixture
+    ):
+        from tests.shared_networks import Conv2dSemiFolded_1Layer
+
+        kernel = np.random.randint(-3, 4, size=kshape_oihw, dtype=np.int8)
+        _stride = _pair(stride)
+        _padding = _pair(padding)
+        ow = (ishape_chw[-1] + 2 * _padding[1] - kshape_oihw[-1]) // _stride[1] + 1
 
-        net1 = Conv2dSemiMap_Net1(shape, kernel, stride, padding)
-        conv = net1.conv1
+        net1 = Conv2dSemiFolded_1Layer(ishape_chw[:2], kernel, _stride, _padding, bias)
+        conv2d = net1.conv1
         generated = DynSysGroup.build_fmodule(net1)
         sim1 = pb.Simulator(net1, start_time_zero=False)
-        probe_conv = pb.Probe(generated[conv][0], "output")
+
+        probe_conv = pb.Probe(generated[conv2d][0], "output")
         sim1.add_probe(probe_conv)
-        inpa = np.random.randint(0, 5, size=(3, 11, 11)).astype(np.int8)
-        inpb = np.concatenate([inpa, np.zeros((3, 10, 11))], axis=1)
-        for i in range(15):
-            pb.FRONTEND_ENV.save(data1=inpb[:, i, :])
-            sim1.run(1)
-        expected = _conv2d_faster_fp32(
-            np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0])
+
+        n_time = 3
+        for _ in range(n_time):
+            sim1.reset()
+            inpa = np.random.randint(0, 3, size=ishape_chw, dtype=np.uint8)
+            inp_pad0 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=np.uint8
+            )
+
+            for i in range(inp_pad0.shape[-1]):
+                pb.FRONTEND_ENV.save(data1=inp_pad0[:, :, i])
+                sim1.run(1)
+
+            expected = _conv2d_faster_fp32(inpa, kernel, _stride, _padding).astype(
+                np.int32
+            )
+
+            # Truncated expected convolution result
+            expected_t = np.where(
+                expected <= 0,
+                0,
+                np.where((expected >> 8) > 0, np.uint8(255), expected & np.uint8(255)),
+            ).astype(np.uint8)
+
+            # Valid result at [kw : kw+ow]
+            for i in range(ow):
+                assert np.array_equal(
+                    expected_t[:, :, i].ravel(),
+                    sim1.data[probe_conv][
+                        generated[conv2d][0].tick_wait_start
+                        + (kshape_oihw[-1] - 1)
+                        - 1
+                        + i * _stride[1]
+                    ],
+                )
+
+    @pytest.mark.parametrize(
+        "ishape_chw, kshape_oihw, stride, padding, out_features",
+        [
+            ((3, 12, 12), (12, 3, 3, 3), 1, 0, (10,)),
+            ((8, 12, 12), (16, 8, 3, 3), 1, 0, (10,)),
+            ((4, 12, 12), (8, 4, 3, 3), 1, 0, (4, 2)),
+            ((4, 24, 24), (8, 4, 3, 3), 2, 0, 10),
+            ((12, 12, 12), (6, 12, 3, 3), 1, 0, (3, 3)),
+            ((4, 24, 24), (8, 4, 4, 4), 2, 0, (10,)),  # corner case
+        ],
+    )
+    def test_Conv2dSemiFolded_FC_Net1(
+        self, ishape_chw, kshape_oihw, stride, padding, out_features, random_fixture
+    ):
+        from tests.shared_networks import Conv2dSemiFolded_FC_Net1
+
+        kernel = np.random.randint(-3, 4, size=kshape_oihw, dtype=np.int8)
+        _stride = _pair(stride)
+        _padding = _pair(padding)
+        oc = kshape_oihw[0]
+        oh = (ishape_chw[1] + 2 * _padding[0] - kshape_oihw[2]) // _stride[0] + 1
+        ow = (ishape_chw[2] + 2 * _padding[1] - kshape_oihw[3]) // _stride[1] + 1
+        fc_weight = np.random.randint(
+            -4, 5, size=(oc * oh * ow, shape2num(out_features)), dtype=np.int8
         )
-        expected = np.array(expected, dtype=np.int32)
-        if (expected >> 8).all() > 0:
-            expected = np.full_like(expected, ((1 << 8) - 1))
-        else:
-            expected = expected & ((1 << 8) - 1)
-        # print(expected)
-        # print(sim1.data[probe_conv])
+
+        net1 = Conv2dSemiFolded_FC_Net1(
+            ishape_chw[:2], kernel, stride, padding, out_features, fc_weight
+        )
+        conv2d = net1.conv1
+        linear = net1.linear1
+        generated = DynSysGroup.build_fmodule(net1)
+        sim1 = pb.Simulator(net1, start_time_zero=False)
+
+        probe_conv = pb.Probe(generated[conv2d][0], "output")
+        probe_linear = pb.Probe(generated[linear][0], "output")
+        sim1.add_probe(probe_conv)
+        sim1.add_probe(probe_linear)
+
+        n_time = 3
+        for _ in range(n_time):
+            sim1.reset()
+            inpa = np.random.randint(0, 3, size=ishape_chw, dtype=np.uint8)
+            inp_pad0 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=np.uint8
+            )
+
+            for i in range(inp_pad0.shape[-1]):
+                pb.FRONTEND_ENV.save(data1=inp_pad0[:, :, i])
+                sim1.run(1)
+
+            expected = _conv2d_faster_fp32(inpa, kernel, _stride, _padding).astype(
+                np.int32
+            )
+            # Truncated expected convolution result
+            expected_t = np.where(
+                expected <= 0,
+                0,
+                np.where((expected >> 8) > 0, np.uint8(255), expected & np.uint8(255)),
+            ).astype(np.uint8)
+
+            # Check the result of semi-folded convolution.
+            # Valid result at [kw : kw+ow]
+            for i in range(ow):
+                assert np.array_equal(
+                    expected_t[:, :, i].ravel(),
+                    sim1.data[probe_conv][
+                        generated[conv2d][0].tick_wait_start
+                        + (kshape_oihw[-1] - 1)
+                        - 1
+                        + i * _stride[1]
+                    ],
+                )
+
+            expected_fc = expected_t.ravel() @ fc_weight
+            # Truncated expected linear result
+            expected_fc_t = np.where(
+                expected_fc <= 0,
+                0,
+                np.where(
+                    (expected_fc >> 8) > 0, np.uint8(255), expected_fc & np.uint8(255)
+                ),
+            ).astype(np.uint8)
+
+            # Check the result of semi-folded linear.
+            assert np.array_equal(
+                expected_fc_t,
+                sim1.data[probe_linear][
+                    generated[linear][0].tick_wait_start + (ow - 1) * _stride[1] + 1
+                ],
+            )
 
     @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
-        "shape, kernel, stride, padding, out_feature, weight",
+        "shape, kernel, stride, padding, out_features, weight",
         [
             (
                 (1, 11),
@@ -911,19 +1012,21 @@ def test_Conv2dSemiMap(self, shape, kernel, stride, padding):
             ),
         ],
     )
-    def test_Conv2dSemiMap_Net(
-        self, shape, kernel, stride, padding, out_feature, weight
+    def test_Conv2dSemiFolded_FC_Net2(
+        self, shape, kernel, stride, padding, out_features, weight
     ):
-        from tests.shared_networks import Conv2dSemiMap_Net2
+        from tests.shared_networks import Conv2dSemiFolded_FC_Net2
 
-        net2 = Conv2dSemiMap_Net2(shape, kernel, stride, padding, out_feature, weight)
-        conv = net2.conv2
+        net2 = Conv2dSemiFolded_FC_Net2(
+            shape, kernel, stride, padding, out_features, weight
+        )
+        conv2d = net2.conv2
         linear = net2.linear1
         generated = DynSysGroup.build_fmodule(net2)
         # sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
-        probe_conv = pb.Probe(generated[conv][0], "output")
+        probe_conv = pb.Probe(generated[conv2d][0], "output")
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim2.add_probe(probe_conv)
         sim2.add_probe(probe_linear)

From 95b3ea7cb2248b988f1d418078f81f7d1551bade Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 3 Sep 2024 14:53:01 +0800
Subject: [PATCH 062/187] =?UTF-8?q?=F0=9F=9A=9A=20rename=20to=20`MaxPool2d?=
 =?UTF-8?q?SemiFoldedSyn`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/synapses/__init__.py   | 2 +-
 paibox/components/synapses/base.py       | 3 +--
 paibox/components/synapses/transforms.py | 6 +++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/paibox/components/synapses/__init__.py b/paibox/components/synapses/__init__.py
index 0aefda77..1f6f60f0 100644
--- a/paibox/components/synapses/__init__.py
+++ b/paibox/components/synapses/__init__.py
@@ -1,2 +1,2 @@
-from .base import Conv2dSemiFoldedSyn, FullConnectedSyn, FullConnSyn, MaxPool2dSemiMapSyn
+from .base import Conv2dSemiFoldedSyn, FullConnectedSyn, FullConnSyn, MaxPool2dSemiFoldedSyn
 from .transforms import ConnType
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index b056d052..4a590c41 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -481,8 +481,7 @@ def __init__(
         )
 
 
-class MaxPool2dSemiMapSyn(FullConnectedSyn):
-
+class MaxPool2dSemiFoldedSyn(FullConnectedSyn):
     def __init__(
         self,
         source: Union[NeuDyn, InputProj],
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 71d76273..8a38166a 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -592,13 +592,13 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
             y = (y1, y2, ..., ym)
         """
         if self.weights.ndim == 0:
-            output = np.full(
+            output = self.weights * np.full(
                 (self.conn_size[1],), np.max(x, axis=None), dtype=VOLTAGE_DTYPE
             )
         else:
             output = np.zeros((self.conn_size[1],), dtype=VOLTAGE_DTYPE)
             for col in range(self.conn_size[1]):
-                non_zero_idx = np.nonzero(self.weights[:, col])[0]
-                output[col] = np.max(x[non_zero_idx])
+                col_result = x * self.weights[:, col]
+                output[col] = np.max(col_result)
 
         return output

From 034c22742f1a2c12e249d3b6fb0a21a003800d9c Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 3 Sep 2024 14:53:14 +0800
Subject: [PATCH 063/187] =?UTF-8?q?=E2=9C=85=20add=20test=20for=20`=5FComp?=
 =?UTF-8?q?areMax`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/synapses/test_transforms.py | 22 +++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index e413322a..b47eb099 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -6,7 +6,7 @@
 from paibox.types import WEIGHT_DTYPE
 from paibox.utils import shape2num
 
-from ..utils import _conv1d_golden, _conv2d_golden
+from tests.components.utils import _conv1d_golden, _conv2d_golden
 
 
 class TestTransforms:
@@ -705,3 +705,23 @@ def test_ConvTranspose2dForward(
             shape2num((kernel.shape[1],) + in_shape),
             shape2num((kernel.shape[0],) + out_shape),
         )
+
+    @pytest.mark.parametrize("n_compare, n_group", [(4, 8), (9, 12), (25, 1)])
+    def test_CompareMax(self, n_compare, n_group):
+        from paibox.components.synapses.transforms import _CompareMax
+
+        n = n_compare * n_group
+        w = np.zeros((n, n_group), dtype=np.int8)
+        for i in range(n_group):
+            w[n_compare * i : n_compare * (i + 1), i] = 1
+
+        f = _CompareMax((n, n_group), w)
+
+        x = np.random.randint(0, 256, size=(n_compare, n_group), dtype=np.uint8)
+        y1 = f(x.ravel(order="F"))  # flatten in column-major order
+        expected = np.zeros((n_group,), dtype=np.int32)
+
+        for i in range(n_group):
+            expected[i] = np.max(x[:, i])
+
+        assert np.array_equal(y1, expected)

From 4e0b3d8b3217ead63065b2f838131d4fb0b36591 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 3 Sep 2024 15:56:50 +0800
Subject: [PATCH 064/187] =?UTF-8?q?=E2=9C=A8=20add=20check=20for=20max=20p?=
 =?UTF-8?q?ooling=20&=20core=20mode?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/neuron/base.py | 20 ++++++++++++++------
 paibox/mixin.py                  |  2 +-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index e68167a2..a35fa8b5 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -21,7 +21,7 @@
 )
 
 from paibox.base import NeuDyn
-from paibox.exceptions import PAIBoxWarning, ShapeError
+from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
@@ -93,10 +93,17 @@ def __init__(
             "spike_width": spike_width,
             "snn_en": snn_en,
         }
-        self.pool_max = pool_max
         # check whether the mode is valid
         self.mode = get_core_mode(input_width, spike_width, snn_en)
 
+        if pool_max == True and self.mode != CoreMode.MODE_ANN:
+            raise NotSupportedError(
+                f"max pooling is only supported in {CoreMode.MODE_ANN.name}, "
+                f"but got {self.mode.name}."
+            )
+
+        self.pool_max = pool_max
+
         # DO NOT modify the names of the following variables.
         # They will be exported to the parameter verification model.
         self.reset_mode = reset_mode
@@ -428,7 +435,7 @@ def __init__(
         input_width: Union[L[1, 8], InputWidthFormat] = InputWidthFormat.WIDTH_1BIT,
         spike_width: Union[L[1, 8], SpikeWidthFormat] = SpikeWidthFormat.WIDTH_1BIT,
         snn_en: Union[bool, SNNModeEnable] = True,
-        pool_max: bool = False,
+        pool_max: Union[bool, MaxPoolingEnable] = False,
         unrolling_factor: int = 1,
         overflow_strict: bool = False,
         keep_shape: bool = True,
@@ -505,12 +512,13 @@ def update(
             return None
 
         if x is None:
-            if not self.pool_max:
-                x = self.sum_inputs()
-            else:
+            if self.pool_max:
                 x = self.max_inputs()
+            else:
+                x = self.sum_inputs()
         else:
             x = np.atleast_1d(x)
+
         self._neu_out, self._vjt = super().update(x, self._vjt)
 
         idx = (self.timestamp + self.delay_relative - 1) % HwConfig.N_TIMESLOT_MAX
diff --git a/paibox/mixin.py b/paibox/mixin.py
index ea056c71..3044f73c 100644
--- a/paibox/mixin.py
+++ b/paibox/mixin.py
@@ -160,7 +160,7 @@ def max_inputs(self, *args, **kwargs) -> VoltageType:
             if output is None:
                 output = node.output.copy()
             else:
-                output = np.maximum(output, node.output.copy())
+                output = np.maximum(output, node.output)
 
         return np.asarray(output, dtype=VOLTAGE_DTYPE)
 

From d2cc15556867dce64eb87cd04450ff51cc44231d Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 3 Sep 2024 15:57:07 +0800
Subject: [PATCH 065/187] =?UTF-8?q?=E2=9C=85=20add=20test=20for=20`sum=5Fi?=
 =?UTF-8?q?nputs`=20of=20neuron?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/neuron/test_neurons.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/components/neuron/test_neurons.py b/tests/components/neuron/test_neurons.py
index 901e3732..451742f2 100644
--- a/tests/components/neuron/test_neurons.py
+++ b/tests/components/neuron/test_neurons.py
@@ -558,6 +558,23 @@ def test_sum_inputs_behavior(self, build_Net2):
             sim.run(1)
             assert np.array_equal(sim.data[net.probe2][i], _always_spike)
 
+    def test_max_inputs_behavior(self):
+        """Only check the voltage result after the `sum_inputs` of neuron."""
+        incoming_v1 = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32)
+        incoming_v2 = np.array([-1, 7, -3, 8, -5, -6, 1, 2], dtype=np.int32)
+        incoming_v3 = np.array([2, 3, 1, -8, 0, 8, 4, 7], dtype=np.int32)
+        incoming_v = [incoming_v1, incoming_v2, incoming_v3]
+
+        v_poolmax = np.zeros_like(incoming_v1)
+        for v in incoming_v:
+            if v_poolmax is None:
+                v_poolmax = v.copy()
+            else:
+                v_poolmax = np.maximum(v_poolmax, v)
+
+        assert v_poolmax.shape == incoming_v1.shape
+        assert np.array_equal(v_poolmax, np.array([2, 7, 3, 8, 5, 8, 7, 8]))
+
     def test_tick_attr_behavior(self, monkeypatch, build_Net3):
         net = build_Net3
         sim = pb.Simulator(net)

From 95b84f630fd8e861b9cfc680f7f067ae11bd0714 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 4 Sep 2024 20:24:31 +0800
Subject: [PATCH 066/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix:=20when=20the=20`?=
 =?UTF-8?q?NeuModule`=20to=20be=20built=20is=20in=20the=20node=20container?=
 =?UTF-8?q?s,=20remove=20it=20from=20the=20containers?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/network.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/paibox/network.py b/paibox/network.py
index 843ff443..83f231f5 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -7,7 +7,7 @@
 from .components import NeuModule, Neuron, Projection
 from .components.modules import BuiltComponentType
 from .mixin import Container
-from .node import NodeDict
+from .node import NodeDict, NodeList
 
 
 if sys.version_info >= (3, 10):
@@ -88,6 +88,9 @@ def build_fmodule(
 
         generated = dict()
         modules = network.nodes().subset(NeuModule).unique()
+
+        network._remove_modules_from_containers(network, modules)
+
         delay = 1
         for module in modules.values():
             if isinstance(module, Conv2dSemiFolded):
@@ -133,6 +136,23 @@ def _ignore_components(self, *components: DynamicSys) -> None:
             if cpn in self.__dict__.values():
                 cpn.__gh_build_ignore__ = True
 
+    @staticmethod
+    def _remove_modules_from_containers(
+        network: "DynSysGroup", modules: Collector[str, NeuModule]
+    ) -> None:
+        """Remove the built modules from the node containers of the network."""
+        node_lists = [v for v in network.__dict__.values() if isinstance(v, NodeList)]
+        node_dicts = [v for v in network.__dict__.values() if isinstance(v, NodeDict)]
+
+        for module in modules.values():
+            for lst in node_lists:
+                if module in lst:
+                    lst.remove(module)
+
+            for dct in node_dicts:
+                if module in dct.values():
+                    dct.pop(module)
+
     @property
     def components(self) -> Collector[str, DynamicSys]:
         """Recursively search for all components within the network."""

From 52389e5e1627a603bbdd829d374224129a501390 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 5 Sep 2024 14:59:31 +0800
Subject: [PATCH 067/187] =?UTF-8?q?=E2=9C=A8=20store=20`valid=5Finterval`?=
 =?UTF-8?q?=20for=20modules=20with=20semi-folded=20interface?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py   |  4 +++-
 paibox/components/functional.py | 19 ++++++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 870bc3a3..1cda1a81 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -157,8 +157,10 @@ class _DelayChainANN(_DelayChainBase):
 class _HasSemiFoldedIntf(Protocol):
     """The front of this module has replication & delay interface for semi-folded convolution."""
 
+    valid_interval: int = 1
+
     def build(
-        self, network: DynSysGroup, delay: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, **build_options
     ) -> BuiltComponentType: ...
 
 
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 51e99b92..76c292b9 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -867,9 +867,11 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, delay: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, **build_options
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
+        
+        self.valid_interval = valid_interval
 
         delay_shape = self.module_intf.operands[0].shape_out
         n_delays = NodeList()
@@ -890,7 +892,7 @@ def build(
         for i in range(delay_shape[1]):
             neuron = ANNBypassNeuron(
                 shape=delay_shape,
-                delay=delay * i + 1,
+                delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
                 keep_shape=self.keep_shape,
@@ -968,17 +970,19 @@ def __init__(
         if in_ch != cin:
             raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
 
-        _shape_out = (cout, out_h)
-
         super().__init__(
-            neuron_s, shape_out=_shape_out, keep_shape=keep_shape, name=name, **kwargs
+            neuron_s,
+            shape_out=(cout, out_h),
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
         )
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, delay: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, **build_options
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         # if len(self.module_intf.operands[0].shape_out) != 2:
@@ -986,6 +990,7 @@ def build(
         #         self.module_intf.operands[0].shape_out, "CHW"
         #     )
         #     self.module_intf.operands[0].shape_change((in_ch, in_h))
+        self.valid_interval = valid_interval
 
         _, in_h = self.module_intf.operands[0].shape_out
         _, cin, _, kw = self.kernel.shape
@@ -1008,7 +1013,7 @@ def build(
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, in_h),
-                delay=delay * i + 1,
+                delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
                 keep_shape=self.keep_shape,

From b2e5ddbc7c726cebe2e11c6a73b8482a3817db61 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 5 Sep 2024 15:08:36 +0800
Subject: [PATCH 068/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20network?=
 =?UTF-8?q?s=20with=20N*semi=20conv2d=20+=20linear=20structure?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 279 +++++++++++++++-------------
 tests/shared_networks.py            |  58 +++---
 2 files changed, 176 insertions(+), 161 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 5358ee93..9111384d 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -28,11 +28,17 @@ def _assert_build_fmodule(
     # Construct the functional modules
     DynSysGroup.build_fmodule(network)
 
-    # Must exclude `NeuModule`s, because it may be in the probe's `__dict__`.
+    # Must exclude `NeuModule`, because it may be in the `__dict__` of probe
     nodes = network.nodes().subset(DynamicSys).exclude(NeuModule).unique()
     assert len(nodes) == n_node_aft_build
 
 
+def _ann_bit_trunc(v_array: VoltageType, bit_trunc: int = 8) -> NeuOutType:
+    return np.where(v_array <= 0, 0, MetaNeuron._truncate(v_array, bit_trunc)).astype(
+        NEUOUT_U8_DTYPE
+    )
+
+
 N_TEST = 20
 
 
@@ -819,158 +825,181 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.export(fp=ensure_dump_dir)
 
     @pytest.mark.parametrize(
-        "ishape_chw, kshape_oihw, stride, padding, bias",
+        "ishape_chw, n_conv, kshape_oihw, stride, padding, out_features",
         [
-            ((3, 11, 11), (6, 3, 3, 3), 1, 0, 0),
-            ((12, 24, 24), (24, 12, 3, 3), 1, 0, 0),
-            ((16, 32, 32), (24, 16, 4, 4), 1, 0, 0),
-            ((12, 24, 24), (4, 12, 3, 3), 2, 0, 0),
-            ((12, 32, 32), (32, 12, 3, 3), 3, 0, 0),
-            ((12, 28, 28), (4, 12, 3, 3), 2, 0, 0),
-        ],
-    )
-    def test_Conv2dSemiFolded_1Layer(
-        self, ishape_chw, kshape_oihw, stride, padding, bias, random_fixture
-    ):
-        from tests.shared_networks import Conv2dSemiFolded_1Layer
-
-        kernel = np.random.randint(-3, 4, size=kshape_oihw, dtype=np.int8)
-        _stride = _pair(stride)
-        _padding = _pair(padding)
-        ow = (ishape_chw[-1] + 2 * _padding[1] - kshape_oihw[-1]) // _stride[1] + 1
-
-        net1 = Conv2dSemiFolded_1Layer(ishape_chw[:2], kernel, _stride, _padding, bias)
-        conv2d = net1.conv1
-        generated = DynSysGroup.build_fmodule(net1)
-        sim1 = pb.Simulator(net1, start_time_zero=False)
-
-        probe_conv = pb.Probe(generated[conv2d][0], "output")
-        sim1.add_probe(probe_conv)
-
-        n_time = 3
-        for _ in range(n_time):
-            sim1.reset()
-            inpa = np.random.randint(0, 3, size=ishape_chw, dtype=np.uint8)
-            inp_pad0 = np.concatenate(
-                [inpa, np.zeros_like(inpa)], axis=2, dtype=np.uint8
-            )
-
-            for i in range(inp_pad0.shape[-1]):
-                pb.FRONTEND_ENV.save(data1=inp_pad0[:, :, i])
-                sim1.run(1)
-
-            expected = _conv2d_faster_fp32(inpa, kernel, _stride, _padding).astype(
-                np.int32
-            )
-
-            # Truncated expected convolution result
-            expected_t = np.where(
-                expected <= 0,
-                0,
-                np.where((expected >> 8) > 0, np.uint8(255), expected & np.uint8(255)),
-            ).astype(np.uint8)
-
-            # Valid result at [kw : kw+ow]
-            for i in range(ow):
-                assert np.array_equal(
-                    expected_t[:, :, i].ravel(),
-                    sim1.data[probe_conv][
-                        generated[conv2d][0].tick_wait_start
-                        + (kshape_oihw[-1] - 1)
-                        - 1
-                        + i * _stride[1]
-                    ],
-                )
-
-    @pytest.mark.parametrize(
-        "ishape_chw, kshape_oihw, stride, padding, out_features",
-        [
-            ((3, 12, 12), (12, 3, 3, 3), 1, 0, (10,)),
-            ((8, 12, 12), (16, 8, 3, 3), 1, 0, (10,)),
-            ((4, 12, 12), (8, 4, 3, 3), 1, 0, (4, 2)),
-            ((4, 24, 24), (8, 4, 3, 3), 2, 0, 10),
-            ((12, 12, 12), (6, 12, 3, 3), 1, 0, (3, 3)),
-            ((4, 24, 24), (8, 4, 4, 4), 2, 0, (10,)),  # corner case
+            # n_conv = 1
+            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], [0], (10,)),
+            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], [0], (10,)),
+            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], [0], (10,)),
+            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], [0], (4, 2)),
+            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], [0], 10),
+            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], [0], (3, 3)),
+            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], [0], (10,)),
+            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], [0], 10),
+            # n_conv = 2
+            (
+                (4, 32, 32),
+                2,
+                [(8, 4, 3, 3), (12, 8, 4, 4)],
+                [(2, 2), (2, 2)],
+                [0, 0],
+                10,
+            ),
+            (
+                (4, 32, 32),
+                2,
+                [(8, 4, 3, 3), (12, 8, 4, 4)],
+                [(2, 2), (1, 1)],
+                [0, 0],
+                10,
+            ),
+            ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [0, 0], 10),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], [0, 0], 10),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], [0, 0], 10),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], [0, 0], 10),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [2, 2], [0, 0], 10),
+            # n_conv = 3
+            (
+                (4, 32, 32),
+                3,
+                [(8, 4, 3, 3), (16, 8, 3, 3), (8, 16, 2, 2)],
+                [2, 1, 1],
+                [0, 0, 0],
+                3,
+            ),
+            (
+                (3, 32, 32),
+                3,
+                [(16, 3, 3, 3), (32, 16, 3, 3), (10, 32, 3, 3)],
+                [1, 1, 1],
+                [0, 0, 0],
+                10,
+            ),
         ],
     )
-    def test_Conv2dSemiFolded_FC_Net1(
-        self, ishape_chw, kshape_oihw, stride, padding, out_features, random_fixture
+    def test_Conv2dSemiFolded_FC_ChainNet(
+        self,
+        ishape_chw,
+        n_conv,
+        kshape_oihw,
+        stride,
+        padding,
+        out_features,
+        random_fixture,
     ):
-        from tests.shared_networks import Conv2dSemiFolded_FC_Net1
+        """Test the network with N semi-folded conv2d + 1 semi-folded linear."""
+        from tests.shared_networks import Conv2dSemiFolded_FC_ChainNetN
+
+        assert n_conv == len(kshape_oihw) == len(stride) == len(padding)
+        kernels = []
+        strides = []
+        paddings = []
+        ocs = []
+        ohs = []
+        ows = []
+
+        for i_conv in range(n_conv):
+            kshape, s, p = kshape_oihw[i_conv], stride[i_conv], padding[i_conv]
+
+            k = np.random.randint(-3, 4, size=kshape, dtype=WEIGHT_DTYPE)
+            _stride = _pair(s)
+            _padding = _pair(p)
+            kernels.append(k)
+            strides.append(_stride)
+            paddings.append(_padding)
+
+            ih = ishape_chw[1] if i_conv == 0 else ohs[-1]
+            iw = ishape_chw[2] if i_conv == 0 else ows[-1]
+            oc = kshape[0]
+            oh = (ih + 2 * _padding[0] - kshape[2]) // _stride[0] + 1
+            ow = (iw + 2 * _padding[1] - kshape[3]) // _stride[1] + 1
+            ocs.append(oc)
+            ohs.append(oh)
+            ows.append(ow)
 
-        kernel = np.random.randint(-3, 4, size=kshape_oihw, dtype=np.int8)
-        _stride = _pair(stride)
-        _padding = _pair(padding)
-        oc = kshape_oihw[0]
-        oh = (ishape_chw[1] + 2 * _padding[0] - kshape_oihw[2]) // _stride[0] + 1
-        ow = (ishape_chw[2] + 2 * _padding[1] - kshape_oihw[3]) // _stride[1] + 1
         fc_weight = np.random.randint(
-            -4, 5, size=(oc * oh * ow, shape2num(out_features)), dtype=np.int8
+            -4,
+            5,
+            size=(ocs[-1] * ohs[-1] * ows[-1], shape2num(out_features)),
+            dtype=WEIGHT_DTYPE,
         )
 
-        net1 = Conv2dSemiFolded_FC_Net1(
-            ishape_chw[:2], kernel, stride, padding, out_features, fc_weight
+        net2 = Conv2dSemiFolded_FC_ChainNetN(
+            ishape_chw[:2], kernels, strides, paddings, out_features, fc_weight
         )
-        conv2d = net1.conv1
-        linear = net1.linear1
-        generated = DynSysGroup.build_fmodule(net1)
-        sim1 = pb.Simulator(net1, start_time_zero=False)
+        # `conv_list` will be removed in `build_fmodule`
+        conv2d_list = net2.conv_list.copy()
+        linear = net2.linear1
+        generated = DynSysGroup.build_fmodule(net2)
+        sim1 = pb.Simulator(net2, start_time_zero=False)
+
+        probe_conv_list = []
+        for conv2d in conv2d_list:
+            probe = pb.Probe(generated[conv2d][0], "output")
+            probe_conv_list.append(probe)
+            sim1.add_probe(probe)
 
-        probe_conv = pb.Probe(generated[conv2d][0], "output")
         probe_linear = pb.Probe(generated[linear][0], "output")
-        sim1.add_probe(probe_conv)
         sim1.add_probe(probe_linear)
 
-        n_time = 3
-        for _ in range(n_time):
+        semi_folded_modules = [*conv2d_list, linear]
+        semi_valid_interval = []
+        for m in semi_folded_modules:
+            semi_valid_interval.append(m.valid_interval)
+
+        ts_1st_valid = [0] * n_conv
+        for i in range(n_conv):
+            if i == 0:
+                ts_1st_valid[i] = kshape_oihw[0][-1] * semi_valid_interval[0]
+            else:
+                ts_1st_valid[i] = (
+                    ts_1st_valid[i - 1]
+                    + (kshape_oihw[i][-1] - 1) * semi_valid_interval[i]
+                )
+
+        n_test = 3  # can be more
+        for _ in range(n_test):
             sim1.reset()
-            inpa = np.random.randint(0, 3, size=ishape_chw, dtype=np.uint8)
+            inpa = np.random.randint(0, 3, size=ishape_chw, dtype=VOLTAGE_DTYPE)
             inp_pad0 = np.concatenate(
-                [inpa, np.zeros_like(inpa)], axis=2, dtype=np.uint8
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
             )
 
             for i in range(inp_pad0.shape[-1]):
                 pb.FRONTEND_ENV.save(data1=inp_pad0[:, :, i])
                 sim1.run(1)
 
-            expected = _conv2d_faster_fp32(inpa, kernel, _stride, _padding).astype(
-                np.int32
-            )
-            # Truncated expected convolution result
-            expected_t = np.where(
-                expected <= 0,
-                0,
-                np.where((expected >> 8) > 0, np.uint8(255), expected & np.uint8(255)),
-            ).astype(np.uint8)
-
-            # Check the result of semi-folded convolution.
-            # Valid result at [kw : kw+ow]
-            for i in range(ow):
-                assert np.array_equal(
-                    expected_t[:, :, i].ravel(),
-                    sim1.data[probe_conv][
-                        generated[conv2d][0].tick_wait_start
-                        + (kshape_oihw[-1] - 1)
-                        - 1
-                        + i * _stride[1]
-                    ],
+            x = inpa
+            for i_conv in range(n_conv):
+                x = _ann_bit_trunc(
+                    _conv2d_faster_fp32(
+                        x, kernels[i_conv], strides[i_conv], paddings[i_conv]
+                    ).astype(VOLTAGE_DTYPE)
                 )
 
-            expected_fc = expected_t.ravel() @ fc_weight
-            # Truncated expected linear result
-            expected_fc_t = np.where(
-                expected_fc <= 0,
-                0,
-                np.where(
-                    (expected_fc >> 8) > 0, np.uint8(255), expected_fc & np.uint8(255)
-                ),
-            ).astype(np.uint8)
+                # Check the result of semi-folded convolutions.
+                for i in range(ow):
+                    assert np.array_equal(
+                        x[:, :, i].ravel(),
+                        sim1.data[probe_conv_list[i_conv]][
+                            conv2d_list[i_conv].tick_wait_start
+                            + ts_1st_valid[i_conv]
+                            + i * semi_valid_interval[i_conv + 1]
+                            - 1
+                        ],
+                    )
+
+            # x is the reference result of the last convolution.
+            expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
 
             # Check the result of semi-folded linear.
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    generated[linear][0].tick_wait_start + (ow - 1) * _stride[1] + 1
+                    linear.tick_wait_start
+                    + ts_1st_valid[-1]
+                    + (ows[-1] - 1) * semi_valid_interval[-1]
+                    - 1
                 ],
             )
 
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 8cd7ad5c..2efd6197 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -4,6 +4,7 @@
 import pytest
 
 import paibox as pb
+from paibox.node import NodeList
 
 
 def _out_bypass1(t, data1, *args, **kwargs):
@@ -253,53 +254,38 @@ def __init__(self, shape, axes):
         self.probe2 = pb.Probe(self.n2, "spike")
 
 
-class Conv2dSemiFolded_1Layer(pb.DynSysGroup):
-    def __init__(self, shape, kernel, stride, padding, bias):
+class Conv2dSemiFolded_FC_ChainNetN(pb.DynSysGroup):
+    def __init__(self, shape, kernels, strides, paddings, out_features, weight):
         super().__init__()
 
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.Conv2dSemiFolded(
-            self.i1, kernel, stride[0], padding[0], bias=bias, tick_wait_start=1
-        )
-        
-
-class Conv2dSemiFolded_FC_Net1(pb.DynSysGroup):
-    def __init__(self, shape, kernel, stride, padding, out_features, weight):
-        super().__init__()
+        self.conv_list = NodeList()
+
+        for i, (kernel, stride, padding) in enumerate(zip(kernels, strides, paddings)):
+            self.conv_list.append(
+                pb.Conv2dSemiFolded(
+                    self.conv_list[-1] if i > 0 else self.i1,
+                    kernel,
+                    stride,
+                    padding,
+                    tick_wait_start=1 + 2 * i,
+                )
+            )
 
-        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.Conv2dSemiFolded(
-            self.i1, kernel, stride, padding, tick_wait_start=1
-        )
         self.linear1 = pb.LinearSemiFolded(
-            self.conv1,
+            self.conv_list[-1],
             out_features,
-            weights=weight,
+            weight,
             bias=0,
             conn_type=pb.SynConnType.All2All,
-            tick_wait_start=self.conv1.tick_wait_start + 2,
+            tick_wait_start=self.conv_list[-1].tick_wait_start + 2,
         )
 
 
-class Conv2dSemiFolded_FC_Net2(pb.DynSysGroup):
-    def __init__(self, shape, kernel, stride, padding, out_features, weight):
-        super().__init__()
-
-        self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        self.conv1 = pb.Conv2dSemiFolded(
-            self.i1, kernel, stride[0], padding[0], tick_wait_start=1
-        )
-        self.conv2 = pb.Conv2dSemiFolded(
-            self.conv1, kernel, stride[1], padding[1], tick_wait_start=3
-        )
-        self.linear1 = pb.LinearSemiFolded(
-            self.conv2,
-            out_features,
-            weights=weight,
-            bias=0,
-            conn_type=pb.SynConnType.All2All,
-            tick_wait_start=5,
-        )
+_pool_semi_op = {
+    "avg": pb.AvgPool2dSemiFolded,
+    "max": pb.MaxPool2dSemiFolded,
+}
 
 
 class Pool2dSemiMap_Net(pb.DynSysGroup):

From c894953a98de451b84ae75d2e8c5046fae414ae8 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 16:54:59 +0800
Subject: [PATCH 069/187] =?UTF-8?q?=E2=9C=A8=20remove=20old=20random=20fix?=
 =?UTF-8?q?ture,=20use=20`fixed=5Frng`=20instead.=20Update=20the=20guide?=
 =?UTF-8?q?=20of=20test.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/Guide-of-Test.md           | 28 +++++++---------------------
 tests/backend/test_placement.py | 10 +++++-----
 tests/conftest.py               |  5 ++---
 tests/simulator/test_encoder.py |  4 ++--
 tests/utils.py                  | 15 ++-------------
 5 files changed, 18 insertions(+), 44 deletions(-)

diff --git a/docs/Guide-of-Test.md b/docs/Guide-of-Test.md
index b48a50f2..6eb3724e 100644
--- a/docs/Guide-of-Test.md
+++ b/docs/Guide-of-Test.md
@@ -123,29 +123,15 @@ pytest = "^8.0.0"
            func2(...)
    ```
 
-6. 可复现的随机测试上下文。该夹具将为测试项目设置随机数种子，确保每次测试中，该测试项内的随机数均相同。
+6. 固定种子的随机数生成器。该夹具返回一个固定的随机数生成器，通过该生成器生成的随机数可复现。
 
-   ```python
-   @pytest.fixture
-   def random_fixture():
-       with fixed_random_seed(42):
-           yield
-
-   def test_foo(random_fixture):
-       ...
-   ```
-
-   或者，亦可对测试项目的**部分代码**设置固定的随机数种子，使用上下文环境 `with`
-
-   ```python
-   from .utils import fixed_random_seed
-   import numpy as np
-
-   def test_case():
-       with fixed_random_seed(999):
-           rd1 = np.random.randn() # Reproducible
+    ```python
+    @pytest.fixture
+    def fixed_rng() -> np.random.Generator:
+        return np.random.default_rng(42)
 
-       rd2 = np.random.randn() # Not reproducible
+    def test_foo(fixed_rng):
+        fixed_rng.random(...)
    ```
 
 ## 更多
diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index 755f209c..3d67dd4f 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -23,9 +23,9 @@ def packbits_ref(bits: np.ndarray, count: int) -> int:
     return result
 
 
-def test_get_raw_weight_ref(random_fixture):
-    w1 = np.random.randint(-128, 128, size=(10, 20), dtype=WEIGHT_DTYPE)
-    w2 = np.random.randint(-128, 128, size=(10, 30), dtype=WEIGHT_DTYPE)
+def test_get_raw_weight_ref(fixed_rng: np.random.Generator):
+    w1 = fixed_rng.integers(-128, 128, size=(10, 20), dtype=WEIGHT_DTYPE)
+    w2 = fixed_rng.integers(-128, 128, size=(10, 30), dtype=WEIGHT_DTYPE)
 
     w_of_neurons = [w1, w2]
 
@@ -353,14 +353,14 @@ def test_CorePlacement_weight_pack_shape(self):
             (HwConfig.ADDR_AXON_MAX + 1) // (WRAM_PACKED_DTYPE(1).nbytes * 8),
         )
 
-    def test_packbits_to_mapping_form(self, random_fixture):
+    def test_packbits_to_mapping_form(self, fixed_rng: np.random.Generator):
         def _weight_ram_T(weight_ram_mapped: np.ndarray):
             _w = weight_ram_mapped.T.reshape(-1, 64)
             w_packed_u8 = np.packbits(_w, axis=-1, bitorder="little")
 
             return w_packed_u8
 
-        w = np.random.randint(-8, 8, size=(1152, 64), dtype=WEIGHT_DTYPE)
+        w = fixed_rng.integers(-8, 8, size=(1152, 64), dtype=WEIGHT_DTYPE)
 
         # 1152 * 512
         w1 = self._weight_ram_mapping_ref(w, 8, False, 0)
diff --git a/tests/conftest.py b/tests/conftest.py
index 485c7fdc..0f4f077e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -82,9 +82,8 @@ def perf_fixture(request):
 
 
 @pytest.fixture
-def random_fixture():
-    with fixed_random_seed(42):
-        yield
+def fixed_rng() -> np.random.Generator:
+    return np.random.default_rng(42)
 
 
 class ParametrizedTestData(TypedDict):
diff --git a/tests/simulator/test_encoder.py b/tests/simulator/test_encoder.py
index 259e8ccb..62302eee 100644
--- a/tests/simulator/test_encoder.py
+++ b/tests/simulator/test_encoder.py
@@ -36,9 +36,9 @@ def test_LatencyEncoder(self):
             out_spike2[t] = le2(x)
         assert 1
 
-    def test_PoissonEncoder(self, random_fixture):
+    def test_PoissonEncoder(self, fixed_rng: np.random.Generator):
         seed = 1
-        x = np.random.rand(10, 10).astype(np.float32)
+        x = fixed_rng.random(size=(10, 10), dtype=np.float32)
         pe = pb.simulator.PoissonEncoder(seed=seed)
         out_spike = np.full((20, 10, 10), 0)
         for t in range(20):
diff --git a/tests/utils.py b/tests/utils.py
index 0b6b4cda..1645d446 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,10 +1,9 @@
 import time
 from contextlib import contextmanager
-from typing import Any, Generator, Optional
+from typing import Any, Generator
 
-import numpy as np
 
-__all__ = ["measure_time", "fixed_random_seed"]
+__all__ = ["measure_time"]
 
 
 @contextmanager
@@ -16,13 +15,3 @@ def measure_time(desc: str) -> Generator[None, Any, None]:
         end_time = time.time()
         elapsed = end_time - start_time
         print(f"{desc} executed in: {elapsed:.2f} secs")
-
-
-@contextmanager
-def fixed_random_seed(seed: Optional[int] = None) -> Generator[None, Any, None]:
-    state = np.random.get_state()
-    np.random.seed(seed)
-    try:
-        yield
-    finally:
-        np.random.set_state(state)

From fde96c1b31e485b1370d9df7c7006d1f8067bbfd Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 17:35:34 +0800
Subject: [PATCH 070/187] =?UTF-8?q?=F0=9F=94=A7=20`max/avgpool2d=5Fgolden`?=
 =?UTF-8?q?=20is=20compatible=20with=20the=20output=20of=20the=20`SynOutTy?=
 =?UTF-8?q?pe`=20type=20to=20calculate=20the=20reference=20output=20in=20A?=
 =?UTF-8?q?NN=20mode?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/utils.py | 156 ++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 84 deletions(-)

diff --git a/tests/components/utils.py b/tests/components/utils.py
index 3e399444..cc7ac71a 100644
--- a/tests/components/utils.py
+++ b/tests/components/utils.py
@@ -1,11 +1,19 @@
-from typing import Optional
+from typing import Optional, Union, overload
 
 import numpy as np
 
-from paibox.types import WEIGHT_DTYPE, NeuOutType, SpikeType
+from paibox.types import (
+    NEUOUT_U8_DTYPE,
+    SPIKE_DTYPE,
+    VOLTAGE_DTYPE,
+    WEIGHT_DTYPE,
+    NeuOutType,
+    SpikeType,
+    SynOutType,
+)
 
 
-def _conv1d_golden(
+def conv1d_golden(
     x: np.ndarray,
     out_shape: tuple[int],
     kernel: np.ndarray,
@@ -40,7 +48,7 @@ def _conv1d_golden(
     return out
 
 
-def _conv2d_golden(
+def conv2d_golden(
     x: np.ndarray,
     out_shape: tuple[int, int],
     kernel: np.ndarray,
@@ -88,7 +96,7 @@ def maxpool1d_golden(
     kernel_size: tuple[int],
     stride: Optional[tuple[int]],
     padding: tuple[int],
-    fm_order: str,
+    fm_order: str = "CL",
 ) -> SpikeType:
     if fm_order == "LC":
         _x = x.T
@@ -115,13 +123,33 @@ def maxpool1d_golden(
     return out
 
 
+@overload
 def maxpool2d_golden(
     x: SpikeType,
     kernel_size: tuple[int, int],
     stride: Optional[tuple[int, int]],
     padding: tuple[int, int],
-    fm_order: str,
-) -> SpikeType:
+    fm_order: str = "CHW",
+) -> SpikeType: ...
+
+
+@overload
+def maxpool2d_golden(
+    x: NeuOutType,
+    kernel_size: tuple[int, int],
+    stride: Optional[tuple[int, int]],
+    padding: tuple[int, int],
+    fm_order: str = "CHW",
+) -> SynOutType: ...
+
+
+def maxpool2d_golden(
+    x: Union[NeuOutType, SpikeType],
+    kernel_size: tuple[int, int],
+    stride: Optional[tuple[int, int]],
+    padding: tuple[int, int],
+    fm_order: str = "CHW",
+) -> Union[SynOutType, SpikeType]:
     if fm_order == "HWC":
         _x = x.transpose(2, 0, 1)
     else:
@@ -134,7 +162,12 @@ def maxpool2d_golden(
     ow = (iw - kw + 2 * padding[1]) // _stride[1] + 1
     cout = xcin
 
-    out = np.zeros((cout, oh, ow), dtype=x.dtype)
+    if x.dtype == NEUOUT_U8_DTYPE:
+        # Treat the result as voltage since it will be turncated later.
+        out = np.zeros((cout, oh, ow), dtype=VOLTAGE_DTYPE)
+    else:
+        out = np.zeros((cout, oh, ow), dtype=SPIKE_DTYPE)
+
     x_padded = np.pad(
         _x,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
@@ -160,8 +193,8 @@ def avgpool1d_golden(
     kernel_size: tuple[int],
     stride: Optional[tuple[int]],
     padding: tuple[int],
-    fm_order: str,
     threshold: int,
+    fm_order: str = "CL",
 ) -> SpikeType:
     if fm_order == "LC":
         _x = x.T
@@ -188,14 +221,36 @@ def avgpool1d_golden(
     return out >= threshold
 
 
+@overload
 def avgpool2d_golden(
     x: SpikeType,
     kernel_size: tuple[int, int],
     stride: Optional[tuple[int, int]],
     padding: tuple[int, int],
-    fm_order: str,
     threshold: int,
-) -> SpikeType:
+    fm_order: str = "CHW",
+) -> SpikeType: ...
+
+
+@overload
+def avgpool2d_golden(
+    x: NeuOutType,
+    kernel_size: tuple[int, int],
+    stride: Optional[tuple[int, int]],
+    padding: tuple[int, int],
+    threshold: None = None,
+    fm_order: str = "CHW",
+) -> SynOutType: ...
+
+
+def avgpool2d_golden(
+    x: Union[NeuOutType, SpikeType],
+    kernel_size: tuple[int, int],
+    stride: Optional[tuple[int, int]],
+    padding: tuple[int, int],
+    threshold: Optional[int] = None,
+    fm_order: str = "CHW",
+) -> Union[SynOutType, SpikeType]:
     if fm_order == "HWC":
         _x = x.transpose(2, 0, 1)
     else:
@@ -208,7 +263,8 @@ def avgpool2d_golden(
     ow = (iw - kw + 2 * padding[1]) // _stride[1] + 1
     cout = xcin
 
-    out = np.zeros((cout, oh, ow), dtype=WEIGHT_DTYPE)
+    # Treat the result as voltage since it will be turncated or compared later.
+    out = np.zeros((cout, oh, ow), dtype=VOLTAGE_DTYPE)
     x_padded = np.pad(
         _x,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
@@ -226,75 +282,7 @@ def avgpool2d_golden(
                     ]
                 )
 
-    return out >= threshold
-
-
-def max_pooling(
-    input_data,
-    kernel_size: tuple[int, int],
-    stride: tuple[int, int],
-) -> NeuOutType:
-    """
-    实现最大池化层
-
-    参数:
-    input_data (numpy.ndarray): 输入数据,形状为(channels, height, width)
-    kernel_size (int): 池化核大小
-    stride (int): 步长
-
-    返回:
-    numpy.ndarray: 池化后的输出数据,形状为(channels, new_height, new_width)
-    """
-    channels, height, width = input_data.shape
-    new_height = (height - kernel_size[0]) // stride[0] + 1
-    new_width = (width - kernel_size[1]) // stride[1] + 1
-
-    output_data = np.zeros((channels, new_height, new_width))
-
-    for c in range(channels):
-        for i in range(new_height):
-            for j in range(new_width):
-                x1 = i * stride[0]
-                y1 = j * stride[1]
-                x2 = x1 + kernel_size[0]
-                y2 = y1 + kernel_size[1]
-                output_data[c, i, j] = np.max(input_data[c, x1:x2, y1:y2])
-
-    return output_data
-
-
-def avg_pooling(
-    input_data,
-    kernel_size: tuple[int, int],
-    stride: tuple[int, int],
-) -> NeuOutType:
-    """
-    实现平均池化层
-
-    参数:
-    input_data (numpy.ndarray): 输入数据,形状为(batch_size, channels, height, width)
-    kernel_size (int): 池化核大小
-    stride (int): 步长
-
-    返回:
-    numpy.ndarray: 池化后的输出数据,形状为(batch_size, channels, new_height, new_width)
-    """
-    channels, height, width = input_data.shape
-    kernel_height, kernel_width = kernel_size
-    new_height = (height - kernel_size[0]) // stride[0] + 1
-    new_width = (width - kernel_size[1]) // stride[1] + 1
-
-    output_data = np.zeros((channels, new_height, new_width), dtype=np.int32)
-
-    for c in range(channels):
-        for i in range(new_height):
-            for j in range(new_width):
-                x1 = i * stride[0]
-                y1 = j * stride[1]
-                x2 = x1 + kernel_size[0]
-                y2 = y1 + kernel_size[1]
-                output_data[c, i, j] = np.sum(input_data[c, x1:x2, y1:y2]) >> (
-                    (kernel_height * kernel_width).bit_length() - 1
-                )
-
-    return output_data
+    if threshold:
+        return out >= threshold
+    else:
+        return out >> ((kh * kw).bit_length() - 1)

From 4be21dad14734c58c21dd694185651a6b178c0d2 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 19:45:17 +0800
Subject: [PATCH 071/187] =?UTF-8?q?=E2=9C=A8=20add=20`=5FSemiFoldedModule`?=
 =?UTF-8?q?=20to=20indicate=20the=20modules=20with=20semi-folded=20interfa?=
 =?UTF-8?q?ce?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 1cda1a81..f98e9692 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -4,6 +4,7 @@
 from paicorelib import TM
 
 from paibox.base import NeuDyn, NodeList
+from paibox.exceptions import NotSupportedError
 from paibox.network import DynSysGroup
 from paibox.types import (
     LEAK_V_DTYPE,
@@ -50,7 +51,7 @@
     "_SpikingPool1dWithV",
     "_SpikingPool2d",
     "_SpikingPool2dWithV",
-    "_HasSemiFoldedIntf",
+    "_SemiFoldedModule",
     "_LinearBase",
 ]
 
@@ -157,14 +158,28 @@ class _DelayChainANN(_DelayChainBase):
 class _HasSemiFoldedIntf(Protocol):
     """The front of this module has replication & delay interface for semi-folded convolution."""
 
-    valid_interval: int = 1
-
     def build(
         self, network: DynSysGroup, valid_interval: int, **build_options
     ) -> BuiltComponentType: ...
 
 
 @set_rt_mode_ann()
+class _SemiFoldedModule(FunctionalModule, _HasSemiFoldedIntf):
+    valid_interval: int = 1
+    """The interval of valid output data"""
+
+    @staticmethod
+    def _w_padding_check(w_padding: int, prev_node: Union[NeuDyn, InputProj]) -> None:
+        # NOTE: Only support padding in the first semi-folded conv2d for now.
+        # In fact, it is rare for the H & W directions to be padded unequally.
+        # TODO Support H padding
+        if w_padding > 0 and not isinstance(prev_node, InputProj):
+            raise NotSupportedError(
+                "only semi-folded convolutions that connect input projection "
+                "are supported to have padding in the W direction."
+            )
+
+
 class _LinearBase(FunctionalModule):
     def __init__(
         self,

From 83820742d98cd9fb00d1b81b261c8c993a0143e3 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 19:48:09 +0800
Subject: [PATCH 072/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor=20or=20up?=
 =?UTF-8?q?date=20semi-folded=20modules=20&=20building=20processing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/functional.py | 275 +++++++++++++-------------------
 paibox/network.py               |  32 ++--
 2 files changed, 133 insertions(+), 174 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 76c292b9..e67d5bd6 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1,4 +1,3 @@
-import math
 import sys
 from collections.abc import Sequence
 from functools import partial
@@ -30,7 +29,6 @@
     FunctionalModule2to1,
     FunctionalModule2to1WithV,
     TransposeModule,
-    set_rt_mode,
     set_rt_mode_ann,
     set_rt_mode_snn,
 )
@@ -39,9 +37,9 @@
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, FullConnSyn, Conv2dSemiFoldedSyn
+from .synapses import ConnType, FullConnSyn, Conv2dSemiFoldedSyn, MaxPool2dSemiFoldedSyn
 from .synapses.conv_types import _Size1Type, _Size2Type
-from .synapses.conv_utils import _fm_ndim2_check, _pair
+from .synapses.conv_utils import _pair
 
 if sys.version_info >= (3, 13):
     from warnings import deprecated
@@ -66,8 +64,8 @@
     "Conv2dSemiFolded",
     "Filter",
     "Linear",
-    "MaxPool2dSemiMap",
-    "AvgPool2dSemiMap",
+    "MaxPool2dSemiFolded",
+    "AvgPool2dSemiFolded",
 ]
 
 
@@ -860,7 +858,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
-class LinearSemiFolded(_LinearBase, _HasSemiFoldedIntf):
+class LinearSemiFolded(_LinearBase, _SemiFoldedModule):
     "That operator is used on the first fully-connected layer after the semi-folded convolution."
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
@@ -870,10 +868,10 @@ def build(
         self, network: DynSysGroup, valid_interval: int, **build_options
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
-        
         self.valid_interval = valid_interval
 
-        delay_shape = self.module_intf.operands[0].shape_out
+        in_ch, in_h = self.module_intf.operands[0].shape_out
+
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
@@ -889,9 +887,9 @@ def build(
             name=f"nd_{self.name}",
         )
 
-        for i in range(delay_shape[1]):
+        for i in range(in_h):
             neuron = ANNBypassNeuron(
-                shape=delay_shape,
+                shape=(in_ch, in_h),
                 delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
@@ -903,13 +901,13 @@ def build(
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
                 neuron,
-                weights=_delay_mapping(delay_shape[1], delay_shape[0], 1),
+                weights=_delay_mapping(in_h, in_ch),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
             s_delays.append(syn1)
 
-            w = self.weights[delay_shape[1] - i - 1 :: delay_shape[1], :]
+            w = self.weights[in_h - i - 1 :: in_h, :]
             syn2 = FullConnSyn(
                 neuron,
                 n_fc,
@@ -925,8 +923,7 @@ def build(
         return generated
 
 
-@set_rt_mode_ann()
-class Conv2dSemiFolded(FunctionalModule, _HasSemiFoldedIntf):
+class Conv2dSemiFolded(_SemiFoldedModule):
     _spatial_ndim: ClassVar[int] = 2
 
     def __init__(
@@ -950,6 +947,8 @@ def __init__(
         self.kernel = kernel
         self.stride = _pair(stride)
         self.padding = _pair(padding)
+        self._w_padding_check(self.padding[1], neuron_s)
+
         self.bit_trunc = bit_trunc
 
         if isinstance(bias, np.ndarray):
@@ -1024,7 +1023,7 @@ def build(
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
                 n_delays[i],
-                weights=_delay_mapping(in_h, cin, 1),
+                weights=_delay_mapping(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1033,10 +1032,10 @@ def build(
             syn2 = Conv2dSemiFoldedSyn(  # cin, ih -> cout * oh
                 neuron,
                 n_conv2d,
-                kernel=self.kernel[:, :, :, kw - i - 1],
-                stride=self.stride,
-                padding=self.padding,
-                order="OIL",
+                self.kernel[:, :, :, kw - i - 1],
+                self.stride,
+                self.padding,
+                "OIL",
                 name=f"s{i}_{self.name}",
             )
             s_kernel.append(syn2)
@@ -1051,9 +1050,8 @@ def build(
     "The backend currently does not support 'Filter', please use it in a future version",
     category=PAIBoxDeprecationWarning,
 )
-@set_rt_mode(8, 8, 0)
+@set_rt_mode_ann()
 class Filter(FunctionalModule):
-
     def __init__(
         self,
         neuron: Union[NeuDyn, InputProj],
@@ -1118,6 +1116,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode_ann()
 class Linear(_LinearBase):
     "Linear layer for ANN."
 
@@ -1145,125 +1144,108 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
             self.module_intf.operands[0],
             neuron_d,
             weights=self.weights,
-            conn_type=self.conn_type,
+            conn_type=ConnType.All2All,
             name=f"syn1_{self.name}",
         )
+
         generated = [neuron_d, syn1]
         self._rebuild_out_intf(network, neuron_d, *generated, **build_options)
 
         return generated
 
 
-@set_rt_mode(8, 8, 0)
-class MaxPool2dSemiMap(FunctionalModule):
+class MaxPool2dSemiFolded(_SemiFoldedModule):
     _spatial_ndim: ClassVar[int] = 2
 
     def __init__(
         self,
         neuron_s: Union[NeuDyn, InputProj],
-        # neuron_d: Union[NeuDyn, InputProj],
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
         # padding: _Size2Type = 0,
-        # bias: Union[int, LeakVType] = 0,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """2d Pool2d_semimap for spike."""
-        self.kernel_size = kernel_size
-        self.stride = _pair(stride)
-        self.pool_max = True
+        """2d semi-folded max pooling for ANN mode."""
+        self.kernel_size = _pair(kernel_size)
+        if stride is None:
+            _stride = self.kernel_size
+        else:
+            _stride = _pair(stride)
+
+        self.stride = _stride
         # self.padding = _pair(padding)
-        # self.bias = bias
+        # self._w_padding_check(self.padding[1], neuron_s)
 
-        if len(neuron_s.shape_out) != 2:
-            in_ch, in_h, in_w = neuron_s.shape_out
-        else:
-            (
-                in_ch,
-                in_h,
-            ) = neuron_s.shape_out
-        cout = cin = in_ch
-        out_h = (in_h - kernel_size[0]) // self.stride[0] + 1
-        if in_ch != cin:
-            raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
+        assert len(neuron_s.shape_out) == 2
+        in_ch, in_h = neuron_s.shape_out
+
+        out_h = (in_h - self.kernel_size[0]) // self.stride[0] + 1
 
-        _shape_out = (cout, out_h)
-        # self.tfm = Conv2dHalfForward((in_ch, in_h), (out_channels, out_h), _kernel, stride, padding)
         super().__init__(
             neuron_s,
-            # neuron_d,
-            shape_out=_shape_out,
+            shape_out=(in_ch, out_h),
             keep_shape=keep_shape,
             name=name,
             **kwargs,
         )
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        print("进入pool2d_func")
-        return
+        raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, delay: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, **build_options
     ) -> BuiltComponentType:
-        # print("进入build")
-        if len(self.module_intf.operands[0].shape_out) != 2:
-            in_ch, in_h, in_w = _fm_ndim2_check(
-                self.module_intf.operands[0].shape_out, "CHW"
-            )
-            self.module_intf.operands[0].shape_change((in_ch, in_h))
+        assert len(self.module_intf.operands[0].shape_out) == 2
+        # if len(self.module_intf.operands[0].shape_out) != 2:
+        #     in_ch, in_h, in_w = _fm_ndim2_check(
+        #         self.module_intf.operands[0].shape_out, "CHW"
+        #     )
+        #     self.module_intf.operands[0].shape_change((in_ch, in_h))
+        self.valid_interval = valid_interval
+
         in_ch, in_h = self.module_intf.operands[0].shape_out
-        cout = cin = in_ch
-        kh, kw = self.kernel_size
+        cin = in_ch
+        _, kw = self.kernel_size
+
         n_delays = NodeList()
         s_delays = NodeList()
-        pool2d = Neuron(
+
+        pool2d = ANNNeuron(
             self.shape_out,
-            reset_mode=RM.MODE_NONRESET,
-            neg_thres_mode=NTM.MODE_SATURATION,
-            leak_v=0,
-            neg_threshold=0,
-            pos_threshold=0,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=self.tick_wait_end,
-            input_width=self.input_width,
-            spike_width=self.spike_width,
-            snn_en=self.snn_en,
-            pool_max=self.pool_max,
+            pool_max=True,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+
         for i in range(kw):
-            neuron = Neuron(
+            neuron = ANNBypassNeuron(
                 (cin, in_h),
-                leak_v=0,
-                neg_threshold=0,
-                delay=delay * i + 1,
+                delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
-                input_width=self.input_width,
-                spike_width=self.spike_width,
-                snn_en=self.snn_en,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
             n_delays.append(neuron)
-            # 延时突触
+            # delay synapses
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],  # (2, 5)
+                self.module_intf.operands[0],
                 n_delays[i],
-                weights=_delay_mapping(in_h, cin, 1),
+                weights=_delay_mapping(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
             s_delays.append(syn1)
-            syn2 = MaxPool2dSemiMapSyn(
-                n_delays[i],
+            syn2 = MaxPool2dSemiFoldedSyn(
+                neuron,
                 pool2d,
-                weights=_pool2d_semimap(
-                    (cin, in_h), self.shape_out, self.kernel_size, self.stride
+                weights=_poo2d_semifolded_mapping(
+                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride
                 ),
                 name=f"s{i}_{self.name}",
             )
@@ -1275,116 +1257,100 @@ def build(
         return generated
 
 
-@set_rt_mode(8, 8, 0)
-class AvgPool2dSemiMap(FunctionalModule):
+class AvgPool2dSemiFolded(_SemiFoldedModule):
     _spatial_ndim: ClassVar[int] = 2
 
     def __init__(
         self,
         neuron_s: Union[NeuDyn, InputProj],
-        # neuron_d: Union[NeuDyn, InputProj],
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
         # padding: _Size2Type = 0,
-        # bias: Union[int, LeakVType] = 0,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
         """2d AvgPool2d_semimap for spike."""
-        self.kernel_size = kernel_size
-        self.stride = _pair(stride)
+        self.kernel_size = _pair(kernel_size)
+        if stride is None:
+            _stride = self.kernel_size
+        else:
+            _stride = _pair(stride)
+
+        self.stride = _stride
         # self.padding = _pair(padding)
-        # self.bias = bias
+        # self._w_padding_check(self.padding[1], neuron_s)
 
-        if len(neuron_s.shape_out) != 2:
-            in_ch, in_h, in_w = neuron_s.shape_out
-        else:
-            (
-                in_ch,
-                in_h,
-            ) = neuron_s.shape_out
-        cout = cin = in_ch
-        out_h = (in_h - kernel_size[0]) // self.stride[0] + 1
-        if in_ch != cin:
-            raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
+        assert len(neuron_s.shape_out) == 2
+        in_ch, in_h = neuron_s.shape_out
+
+        out_h = (in_h - self.kernel_size[0]) // self.stride[0] + 1
 
-        _shape_out = (cout, out_h)
-        # self.tfm = Conv2dHalfForward((in_ch, in_h), (out_channels, out_h), _kernel, stride, padding)
         super().__init__(
             neuron_s,
-            # neuron_d,
-            shape_out=_shape_out,
+            shape_out=(in_ch, out_h),
             keep_shape=keep_shape,
             name=name,
             **kwargs,
         )
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        print("进入pool2d_func")
-        return
+        raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, delay: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, **build_options
     ) -> BuiltComponentType:
-        # print("进入build")
-        if len(self.module_intf.operands[0].shape_out) != 2:
-            in_ch, in_h, in_w = _fm_ndim2_check(
-                self.module_intf.operands[0].shape_out, "CHW"
-            )
-            self.module_intf.operands[0].shape_change((in_ch, in_h))
+        assert len(self.module_intf.operands[0].shape_out) == 2
+        # if len(self.module_intf.operands[0].shape_out) != 2:
+        #     in_ch, in_h, in_w = _fm_ndim2_check(
+        #         self.module_intf.operands[0].shape_out, "CHW"
+        #     )
+        #     self.module_intf.operands[0].shape_change((in_ch, in_h))
+        self.valid_interval = valid_interval
+
         in_ch, in_h = self.module_intf.operands[0].shape_out
-        cout = cin = in_ch
+        cin = in_ch
         kh, kw = self.kernel_size
-        bittrunc = int(math.log2(kw * kh) + 8)
+        # NOTE: Division is achieved with the help of truncation operation.
+        # It can only be approximated to a power of an integer of 2.
+        bit_trunc = 8 + (kh * kw).bit_length() - 1
+
         n_delays = NodeList()
         s_delays = NodeList()
-        pool2d = Neuron(
+
+        pool2d = ANNNeuron(
             self.shape_out,
-            reset_mode=RM.MODE_NONRESET,
-            neg_thres_mode=NTM.MODE_SATURATION,
-            leak_v=0,
-            neg_threshold=0,
-            pos_threshold=0,
             delay=self.delay_relative,
-            bit_truncation=bittrunc,
+            bit_trunc=bit_trunc,
             tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=self.tick_wait_end,
-            input_width=self.input_width,
-            spike_width=self.spike_width,
-            snn_en=self.snn_en,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
         for i in range(kw):
-            neuron = Neuron(
+            neuron = ANNBypassNeuron(
                 (cin, in_h),
-                leak_v=0,
-                neg_threshold=0,
-                delay=delay * i + 1,
+                delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
-                input_width=self.input_width,
-                spike_width=self.spike_width,
-                snn_en=self.snn_en,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
             n_delays.append(neuron)
-            # 延时突触
+            # delay synapses
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],  # (2, 5)
+                self.module_intf.operands[0],
                 n_delays[i],
-                weights=_delay_mapping(in_h, cin, 1),
+                weights=_delay_mapping(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
             s_delays.append(syn1)
             syn2 = FullConnSyn(
-                n_delays[i],
+                neuron,
                 pool2d,
-                weights=_pool2d_semimap(
-                    (cin, in_h), self.shape_out, self.kernel_size, self.stride
+                weights=_poo2d_semifolded_mapping(
+                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride
                 ),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
@@ -1503,29 +1469,18 @@ def _transpose3d_mapping(
     return mt
 
 
-def _delay_mapping(h: int, cin: int, n: int) -> WeightType:
-    # mt = np.zeros((cin * h, cin * n * h), dtype=np.bool_)
-    # for i in range(cin):
-    #     for j in range(n * cin):
-    #         for k in range(h):
-    #             mt[i * h + k, j * h + k] = 1
-    mt = np.eye(cin * h, dtype=WEIGHT_DTYPE)
-    return mt
+def _delay_mapping(h: int, cin: int) -> WeightType:
+    return np.eye(cin * h, dtype=WEIGHT_DTYPE)
 
 
-def _pool2d_semimap(
-    in_shape: _Size2Type,
-    out_shape: _Size2Type,
-    kernel_size: WeightType,
-    stride: _Size2Type,
+def _poo2d_semifolded_mapping(
+    cin: int, ih: int, oh: int, kh: int, stride: tuple[int, int]
 ) -> WeightType:
-    cout = cin = in_shape[0]
-    kh, kw = kernel_size
-    ih = in_shape[1]
-    o_ch, oh = out_shape
-    mt = np.zeros((cin * ih, cout * oh), dtype=np.bool_)
+    cout = cin
+    m = np.zeros((cin * ih, cout * oh), dtype=WEIGHT_DTYPE)
+
     for i in range(cout):
-        for j in range(cin):
-            for k in range(oh):
-                mt[j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k] = 1
-    return mt
+        for j in range(oh):
+            m[i * ih + j * stride[1] : i * ih + j * stride[1] + kh, i * oh + j] = 1
+
+    return m
diff --git a/paibox/network.py b/paibox/network.py
index 83f231f5..d28d942f 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -80,33 +80,37 @@ def build_fmodule(
         cls, network: "DynSysGroup", **build_options
     ) -> dict[NeuModule, BuiltComponentType]:
         from .components.functional import (
-            AvgPool2dSemiMap,
+            AvgPool2dSemiFolded,
             Conv2dSemiFolded,
             LinearSemiFolded,
-            MaxPool2dSemiMap,
+            MaxPool2dSemiFolded,
         )
 
         generated = dict()
         modules = network.nodes().subset(NeuModule).unique()
 
-        network._remove_modules_from_containers(network, modules)
+        # Valid interval for semi-folded components
+        # If the input data is input continuously on the W-axis, the initial
+        # valid interval for the first semi-folded component is 1.
+        semi_valid_interval = 1
 
-        delay = 1
         for module in modules.values():
-            if isinstance(module, Conv2dSemiFolded):
-                generated[module] = module.build(network, delay, **build_options)
-                delay *= module.stride[1]  # stride of w > 1
+            if isinstance(
+                module, (Conv2dSemiFolded, MaxPool2dSemiFolded, AvgPool2dSemiFolded)
+            ):
+                generated[module] = module.build(
+                    network, semi_valid_interval, **build_options
+                )
+                semi_valid_interval *= module.stride[1]
             elif isinstance(module, LinearSemiFolded):
-                generated[module] = module.build(network, delay, **build_options)
-            elif isinstance(module, MaxPool2dSemiMap):
-                generated[module] = module.build(network, delay, **build_options)
-                delay *= module.stride[1]
-            elif isinstance(module, AvgPool2dSemiMap):
-                generated[module] = module.build(network, delay, **build_options)
-                delay *= module.stride[1]
+                generated[module] = module.build(
+                    network, semi_valid_interval, **build_options
+                )
             else:
                 generated[module] = module.build(network, **build_options)
 
+        network._remove_modules_from_containers(network, modules)
+
         return generated
 
     def _add_components(self, *implicit: DynamicSys, **explicit: DynamicSys) -> None:

From e866fcf411131264573d2f8d52dd3c7610084930 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 19:49:00 +0800
Subject: [PATCH 073/187] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20simplified=20`=5Fc?=
 =?UTF-8?q?onv2d=5Fsemifolded=5Funroll`=20&=20bugfix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/synapses/conv_utils.py | 26 +++++++++++-------------
 paibox/components/synapses/transforms.py |  2 +-
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 9916410c..fd3f561f 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -197,19 +197,12 @@ def _conv2d_semifolded_unroll(
 
     for i in range(cout):
         for j in range(cin):
-            if padding[0] == 0:
-                for k in range(oh):
-                    # w_np[j*ih+padding[0]*(stride[1]-1)+k*stride[1]:j*ih+padding[1]*(stride[1]-1)+k*stride[1]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
-                    # w_np[j*ih+stride[1]*(padding[0]+k)-padding[0]:j*ih+stride[1]*(padding[0]+k)-padding[0]+kh, i*oh+k+padding[0]] = kernel[i, j, :]
-                    w_np[
-                        j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k
-                    ] = kernel[i, j, :]
-            else:
-                for k in range(oh):
-                    w_np[
-                        j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k
-                    ] = kernel[i, j, :]
+            for k in range(oh):
+                w_np[
+                    j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k
+                ] = kernel[i, j, :]
 
+            if padding[0] > 0: # H direction
                 w_np = np.delete(
                     w_np,
                     np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
@@ -219,6 +212,11 @@ def _conv2d_semifolded_unroll(
     return w_np
 
 
+"""
+    NOTE: The faster convolutions are verified by _convNd_golden() functions in test utils.
+"""
+
+
 def _conv1d_faster(
     x_cl: NeuOutType,
     out_shape: Size1Type,
@@ -227,7 +225,7 @@ def _conv1d_faster(
     padding: Size1Type,
 ) -> SynOutType:
     """Faster 1d convolution."""
-    cout, cin, kl = kernel.shape  # (O, I, L)
+    cout, _, kl = kernel.shape  # (O, I, L)
 
     x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])), mode="constant")
 
@@ -254,7 +252,7 @@ def _conv2d_faster(
     # fm_order: str,
 ) -> SynOutType:
     """Faster 2d convolution."""
-    cout, cin, kh, kw = kernel.shape  # (O, I, H, W)
+    cout, _, kh, kw = kernel.shape  # (O, I, H, W)
 
     x_padded = np.pad(
         x_chw,
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 8a38166a..e3e25ece 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -598,7 +598,7 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         else:
             output = np.zeros((self.conn_size[1],), dtype=VOLTAGE_DTYPE)
             for col in range(self.conn_size[1]):
-                col_result = x * self.weights[:, col]
+                col_result = x * self.weights[:, col].astype(VOLTAGE_DTYPE)
                 output[col] = np.max(col_result)
 
         return output

From 2af6800de239e8c334c1743d2935787e1966c5b7 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 19:51:17 +0800
Subject: [PATCH 074/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20reorganize=20impor?=
 =?UTF-8?q?t?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/__init__.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index 88abc987..0aec89a4 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -5,16 +5,10 @@
 from .backend import Mapper as Mapper
 
 # Functional modules
-from .components.functional import AvgPool2dSemiMap as AvgPool2dSemiMap
 from .components.functional import BitwiseAND as BitwiseAND
 from .components.functional import BitwiseNOT as BitwiseNOT
 from .components.functional import BitwiseOR as BitwiseOR
 from .components.functional import BitwiseXOR as BitwiseXOR
-from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
-from .components.functional import LinearSemiFolded as LinearSemiFolded
-from .components.functional import Filter as Filter
-from .components.functional import Linear as Linear
-from .components.functional import MaxPool2dSemiMap as MaxPool2dSemiMap
 from .components.functional import SpikingAdd as SpikingAdd
 from .components.functional import SpikingAvgPool1d as SpikingAvgPool1d
 from .components.functional import SpikingAvgPool1dWithV as SpikingAvgPool1dWithV
@@ -26,6 +20,13 @@
 from .components.functional import Transpose2d as Transpose2d
 from .components.functional import Transpose3d as Transpose3d
 
+# Functional modules in ANN mode only
+from .components.functional import Linear as Linear
+from .components.functional import LinearSemiFolded as LinearSemiFolded
+from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
+from .components.functional import AvgPool2dSemiFolded as AvgPool2dSemiFolded
+from .components.functional import MaxPool2dSemiFolded as MaxPool2dSemiFolded
+
 # Reduced neurons
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
@@ -33,6 +34,8 @@
 from .components.neuron.neurons import SpikingRelu as SpikingRelu
 from .components.neuron.neurons import TonicSpiking as TonicSpiking
 from .components.neuron.neurons import BypassNeuron as BypassNeuron
+from .components.neuron.neurons import ANNBypassNeuron as ANNBypassNeuron
+from .components.neuron.neurons import ANNNeuron as ANNNeuron
 
 # Input projection
 from .components.projection import InputProj as InputProj
@@ -48,7 +51,7 @@
 
 # Network
 from .network import DynSysGroup as DynSysGroup
-from .network import Network as Network
+from .network import Network  # alias for DynSysGroup
 
 # Simulation
 from .simulator import Probe as Probe

From 6d436377fd550da02351dd86e78de5ea63ac8109 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 19:53:20 +0800
Subject: [PATCH 075/187] =?UTF-8?q?=E2=9C=85=20use=20verified=20faster=20c?=
 =?UTF-8?q?onvolution?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/synapses/test_transforms.py |  7 +++----
 tests/components/test_functional.py          | 21 ++++++++++++--------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index b47eb099..6eecdb4b 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -2,12 +2,11 @@
 import pytest
 
 from paibox.components.synapses import transforms as tfm
+from paibox.components.synapses.conv_utils import _conv1d_faster, _conv2d_faster
 from paibox.exceptions import AutoOptimizationWarning
 from paibox.types import WEIGHT_DTYPE
 from paibox.utils import shape2num
 
-from tests.components.utils import _conv1d_golden, _conv2d_golden
-
 
 class TestTransforms:
     @pytest.mark.parametrize(
@@ -313,7 +312,7 @@ def test_Conv1dForward(
         # The result of matmul using the unrolled matrix
         y2 = xf @ f.connectivity.astype(np.int32)
 
-        expected = _conv1d_golden(x, out_shape, kernel, stride, padding)
+        expected = _conv1d_faster(x, out_shape, kernel, stride, padding)
 
         assert np.array_equal(y1, expected)
         assert np.array_equal(y2, expected.ravel())
@@ -393,7 +392,7 @@ def test_Conv2dForward(
         # The result of matmul using the unrolled matrix
         y2 = xf @ f.connectivity.astype(np.int32)
 
-        expected = _conv2d_golden(x, out_shape, kernel, stride, padding)
+        expected = _conv2d_faster(x, out_shape, kernel, stride, padding)
 
         assert np.array_equal(y1, expected)
         assert np.array_equal(y2, expected.ravel())
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 9111384d..175d9779 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -4,7 +4,8 @@
 import paibox as pb
 from paibox.base import DynamicSys
 from paibox.components import NeuModule
-from paibox.components.synapses.conv_utils import _pair, _single
+from paibox.components.neuron.base import MetaNeuron
+from paibox.components.synapses.conv_utils import _pair, _single, _conv2d_faster
 from paibox.network import DynSysGroup
 from paibox.simulator.utils import _conv2d_faster_fp32
 from paibox.utils import as_shape, shape2num, typical_round
@@ -470,7 +471,7 @@ def test_SpikingPool1d(
         for i in range(1, N_TEST):
             if pool_type == "avg":
                 expected = avgpool1d_golden(
-                    inpa[i - 1], ksize, _stride, _padding, fm_order, _threshold
+                    inpa[i - 1], ksize, _stride, _padding, _threshold, fm_order
                 ).ravel()
             else:
                 expected = maxpool1d_golden(
@@ -483,7 +484,7 @@ def test_SpikingPool1d(
         for i in range(2, N_TEST):
             if pool_type == "avg":
                 expected = avgpool1d_golden(
-                    inpa[i - 2], ksize, _stride, _padding, fm_order, _threshold
+                    inpa[i - 2], ksize, _stride, _padding, _threshold, fm_order
                 ).ravel()
             else:
                 expected = maxpool1d_golden(
@@ -580,7 +581,7 @@ def test_SpikingPool2d(
         for i in range(1, N_TEST):
             if pool_type == "avg":
                 expected = avgpool2d_golden(
-                    inpa[i - 1], ksize, _stride, _padding, fm_order, _threshold
+                    inpa[i - 1], ksize, _stride, _padding, _threshold, fm_order
                 ).ravel()
             else:
                 expected = maxpool2d_golden(
@@ -593,7 +594,7 @@ def test_SpikingPool2d(
         for i in range(2, N_TEST):
             if pool_type == "avg":
                 expected = avgpool2d_golden(
-                    inpa[i - 2], ksize, _stride, _padding, fm_order, _threshold
+                    inpa[i - 2], ksize, _stride, _padding, _threshold, fm_order
                 ).ravel()
             else:
                 expected = maxpool2d_golden(
@@ -972,9 +973,13 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             x = inpa
             for i_conv in range(n_conv):
                 x = _ann_bit_trunc(
-                    _conv2d_faster_fp32(
-                        x, kernels[i_conv], strides[i_conv], paddings[i_conv]
-                    ).astype(VOLTAGE_DTYPE)
+                    _conv2d_faster(
+                        x,
+                        (ohs[i_conv], ows[i_conv]),
+                        kernels[i_conv],
+                        strides[i_conv],
+                        paddings[i_conv],
+                    )
                 )
 
                 # Check the result of semi-folded convolutions.

From 9b9b0d913d4b4b5a992202458d967ed9479550d4 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 19:56:15 +0800
Subject: [PATCH 076/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20semi-fo?=
 =?UTF-8?q?lded=20max/avg=20pooling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 267 +++++++++++++---------------
 tests/shared_networks.py            |  36 ++--
 2 files changed, 146 insertions(+), 157 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 175d9779..86ef22e1 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -7,14 +7,18 @@
 from paibox.components.neuron.base import MetaNeuron
 from paibox.components.synapses.conv_utils import _pair, _single, _conv2d_faster
 from paibox.network import DynSysGroup
-from paibox.simulator.utils import _conv2d_faster_fp32
+from paibox.types import (
+    VOLTAGE_DTYPE,
+    WEIGHT_DTYPE,
+    NeuOutType,
+    VoltageType,
+    NEUOUT_U8_DTYPE,
+)
 from paibox.utils import as_shape, shape2num, typical_round
 
 from .utils import (
-    avg_pooling,
     avgpool1d_golden,
     avgpool2d_golden,
-    max_pooling,
     maxpool1d_golden,
     maxpool2d_golden,
 )
@@ -1008,159 +1012,144 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                 ],
             )
 
-    @pytest.mark.skip(reason="not implemented yet")
     @pytest.mark.parametrize(
-        "shape, kernel, stride, padding, out_features, weight",
+        "ishape_chw, n_pool, kshape_hw, stride, out_features, pool_type",
         [
-            (
-                (1, 11),
-                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-                [1, 1],
-                [0, 0],
-                10,
-                np.random.randint(-5, 5, size=(7 * 7, 10), dtype=np.int8),
-            ),
-            (
-                (1, 11),
-                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-                [1, 2],
-                [0, 0],
-                10,
-                np.random.randint(-5, 5, size=(4 * 4, 10), dtype=np.int8),
-            ),
-            (
-                (1, 11),
-                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-                [2, 1],
-                [0, 0],
-                10,
-                np.random.randint(-5, 5, size=(3 * 3, 10), dtype=np.int8),
-            ),
-            (
-                (1, 11),
-                np.array([[[[2, 1, 2], [1, -2, 1], [-1, 2, -3]]]], dtype=np.int8),
-                [2, 2],
-                [0, 0],
-                10,
-                np.random.randint(-5, 5, size=(2 * 2, 10), dtype=np.int8),
-            ),
+            # NOTE: the first layer is not likely to be a pooling layer. So we
+            # don't support padding for pooling layers.
+            # n_pool = 2
+            ((3, 24, 24), 2, [2, 2], [1, 1], (2, 2), "avg"),
+            ((3, 24, 24), 2, [(2, 2), (2, 2)], [None, None], (10,), "avg"),
+            ((6, 32, 32), 2, [3, 3], [None, None], (10,), "avg"),
+            ((3, 24, 24), 2, [2, 2], [1, 1], (4,), "max"),
+            ((3, 24, 24), 2, [(2, 2), (2, 2)], [2, 2], (10,), "max"),
+            ((6, 32, 32), 2, [3, 3], [None, None], (10,), "max"),
+            # n_pool = 3
+            ((3, 48, 48), 3, [3, 2, 2], [None, None, None], (10,), "avg"),
+            ((3, 48, 48), 3, [3, 2, 2], [None, None, None], (10,), "max"),
         ],
     )
-    def test_Conv2dSemiFolded_FC_Net2(
-        self, shape, kernel, stride, padding, out_features, weight
+    def test_Pool2dSemiFolded_FC_ChainNet(
+        self, ishape_chw, n_pool, kshape_hw, stride, out_features, pool_type, fixed_rng
     ):
-        from tests.shared_networks import Conv2dSemiFolded_FC_Net2
+        from tests.shared_networks import Pool2dSemiFolded_FC_ChainNetN
 
-        net2 = Conv2dSemiFolded_FC_Net2(
-            shape, kernel, stride, padding, out_features, weight
-        )
-        conv2d = net2.conv2
-        linear = net2.linear1
-        generated = DynSysGroup.build_fmodule(net2)
-        # sim1 = pb.Simulator(net1, start_time_zero=False)
-        sim2 = pb.Simulator(net2, start_time_zero=False)
+        assert n_pool == len(kshape_hw) == len(stride)
+        ksizes = []
+        strides = []
+        paddings = [(0, 0) for _ in range(n_pool)]
+        ocs = []
+        ohs = []
+        ows = []
 
-        probe_conv = pb.Probe(generated[conv2d][0], "output")
-        probe_linear = pb.Probe(generated[linear][0], "output")
-        sim2.add_probe(probe_conv)
-        sim2.add_probe(probe_linear)
-        inpa = np.random.randint(0, 5, size=(1, 11, 11)).astype(np.int8)
-        inpb = np.concatenate([inpa, np.zeros((1, 10, 11))], axis=1)
-        for i in range(17):
-            pb.FRONTEND_ENV.save(data1=inpb[0][i])
-            sim2.run(1)
-        expected = _conv2d_faster_fp32(
-            np.transpose(inpa, (0, 2, 1)), kernel, _pair(stride[0]), _pair(padding[0])
-        )
-        expected[expected < 0] = 0
+        for i_pool in range(n_pool):
+            k, s = kshape_hw[i_pool], stride[i_pool]
 
-        expected = _conv2d_faster_fp32(
-            expected, kernel, _pair(stride[1]), _pair(padding[1])
-        )
-        expected[expected < 0] = 0
+            _ksize = _pair(k)
+            _stride = _pair(s) if s is not None else _ksize
+            ksizes.append(_ksize)
+            strides.append(_stride)
 
-        expected = np.array(expected, dtype=np.int32)
-        expected = expected.ravel() @ weight
-        expected[expected < 0] = 0
-        if (expected >> 8).all() > 0:
-            expected = np.full_like(expected, ((1 << 8) - 1))
-        else:
-            expected = expected & ((1 << 8) - 1)
-        # expected = np.clip(expected, 0, 7)
-        assert np.array_equal(expected, sim2.data[probe_linear][15])
+            ih = ishape_chw[1] if i_pool == 0 else ohs[-1]
+            iw = ishape_chw[2] if i_pool == 0 else ows[-1]
+            oc = ishape_chw[0]
+            oh = (ih - _ksize[0]) // _stride[0] + 1
+            ow = (iw - _ksize[1]) // _stride[1] + 1
+            ocs.append(oc)
+            ohs.append(oh)
+            ows.append(ow)
 
-    @pytest.mark.skip(reason="not implemented yet")
-    @pytest.mark.parametrize(
-        "shape, kernel_size, stride, weight, pool_type",
-        [
-            (
-                (1, 8),
-                (2, 2),
-                [1, 1],
-                np.random.randint(-5, 5, size=(6 * 6, 2), dtype=np.int8),
-                "avg",
-            ),
-            (
-                (1, 8),
-                (2, 2),
-                [2, 2],
-                np.random.randint(-5, 5, size=(2 * 2, 2), dtype=np.int8),
-                "avg",
-            ),
-            (
-                (1, 8),
-                (2, 2),
-                [1, 1],
-                np.random.randint(0, 5, size=(6 * 6, 2), dtype=np.int8),
-                "max",
-            ),
-            (
-                (1, 8),
-                (2, 2),
-                [2, 2],
-                np.random.randint(0, 5, size=(2 * 2, 2), dtype=np.int8),
-                "max",
-            ),
-        ],
-    )
-    def test_Pool2dSemiMap(self, shape, kernel_size, stride, weight, pool_type):
-        from tests.shared_networks import Pool2dSemiMap_Net
+        fc_weight = fixed_rng.integers(
+            -4,
+            5,
+            size=(ocs[-1] * ohs[-1] * ows[-1], shape2num(out_features)),
+            dtype=WEIGHT_DTYPE,
+        )
 
-        net1 = Pool2dSemiMap_Net(shape, kernel_size, stride, weight, pool_type)
-        pool = net1.pool2
+        net1 = Pool2dSemiFolded_FC_ChainNetN(
+            ishape_chw[:2],
+            ksizes,
+            strides,
+            paddings,
+            out_features,
+            fc_weight,
+            pool_type,
+        )
+        # `net1.pool_list` will be removed in `build_fmodule`
+        pool2d_list = net1.pool_list.copy()
         linear = net1.linear1
         generated = DynSysGroup.build_fmodule(net1)
         sim1 = pb.Simulator(net1, start_time_zero=False)
+
+        probe_pool_list = []
+        for poool2d in pool2d_list:
+            probe = pb.Probe(generated[poool2d][0], "output")
+            probe_pool_list.append(probe)
+            sim1.add_probe(probe)
+
         probe_linear = pb.Probe(generated[linear][0], "output")
-        probe_pool = pb.Probe(generated[pool][0], "output")
-        sim1.add_probe(probe_pool)
         sim1.add_probe(probe_linear)
-        inpa = np.random.randint(0, 10, size=(1, 8, 8)).astype(np.int8)
-        inpb = np.concatenate([inpa, np.zeros((1, 10, 8))], axis=1)
-        for i in range(13):
-            pb.FRONTEND_ENV.save(data1=inpb[:, i, :])
-            sim1.run(1)
-        if pool_type == "max":
-            expected = max_pooling(np.transpose(inpa, (0, 2, 1)), kernel_size, stride)
-            expected = max_pooling(expected, kernel_size, stride)
-            expected = np.array(expected, dtype=np.int32)
-            expected = expected.ravel() @ weight
-            if (expected >> 8).all() > 0:
-                expected = np.full_like(expected, ((1 << 8) - 1))
-            else:
-                expected = expected & ((1 << 8) - 1)
-            assert np.array_equal(expected, sim1.data[probe_linear][12])
-        else:
-            expected = avg_pooling(np.transpose(inpa, (0, 2, 1)), kernel_size, stride)
-            expected = avg_pooling(expected, kernel_size, stride)
-            expected = np.array(expected, dtype=np.int32)
-            expected = expected.ravel() @ weight
-            expected[expected < 0] = 0
-            if (expected >> 8).all() > 0:
-                expected = np.full_like(expected, ((1 << 8) - 1))
+
+        semi_folded_modules = [*pool2d_list, linear]
+        semi_valid_interval = []
+        for m in semi_folded_modules:
+            semi_valid_interval.append(m.valid_interval)
+
+        ts_1st_valid = [0] * n_pool
+        for i in range(n_pool):
+            if i == 0:
+                ts_1st_valid[i] = ksizes[0][-1] * semi_valid_interval[0]
             else:
-                expected = expected & ((1 << 8) - 1)
-            assert np.array_equal(expected, sim1.data[probe_linear][12])
+                ts_1st_valid[i] = (
+                    ts_1st_valid[i - 1] + (ksizes[i][-1] - 1) * semi_valid_interval[i]
+                )
+
+        n_test = 3  # can be more
+        _pool_op = {"avg": avgpool2d_golden, "max": maxpool2d_golden}
+
+        for _ in range(n_test):
+            sim1.reset()
+            inpa = fixed_rng.integers(256, size=ishape_chw).astype(NEUOUT_U8_DTYPE)
+            inp_pad0 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+
+            for i in range(inp_pad0.shape[-1]):
+                pb.FRONTEND_ENV.save(data1=inp_pad0[:, :, i])
+                sim1.run(1)
+
+            x = inpa
+            for i_pool in range(n_pool):
+                x = _ann_bit_trunc(
+                    _pool_op[pool_type](
+                        x, ksizes[i_pool], strides[i_pool], paddings[i_pool]
+                    )
+                )
+
+                # Check the result of semi-folded pooling.
+                for i in range(ows[i_pool]):
+                    assert np.array_equal(
+                        x[:, :, i].ravel(),
+                        sim1.data[probe_pool_list[i_pool]][
+                            pool2d_list[i_pool].tick_wait_start
+                            + ts_1st_valid[i_pool]
+                            + i * semi_valid_interval[i_pool + 1]
+                            - 1
+                        ],
+                    )
+
+            # x is the reference result of the last pooling.
+            expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
+            # Check the result of semi-folded linear.
+            assert np.array_equal(
+                expected_fc_t,
+                sim1.data[probe_linear][
+                    linear.tick_wait_start
+                    + ts_1st_valid[-1]
+                    + (ows[-1] - 1) * semi_valid_interval[-1]
+                    - 1
+                ],
+            )
 
     @pytest.mark.parametrize(
         "shape, weight",
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 2efd6197..19a00dce 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -288,31 +288,31 @@ def __init__(self, shape, kernels, strides, paddings, out_features, weight):
 }
 
 
-class Pool2dSemiMap_Net(pb.DynSysGroup):
-    def __init__(self, shape, kernel_size, stride, weight, pool_type):
+class Pool2dSemiFolded_FC_ChainNetN(pb.DynSysGroup):
+    def __init__(
+        self, shape, kernel_sizes, strides, paddings, out_features, weight, pool_type
+    ):
         super().__init__()
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
-        if pool_type == "avg":
-            self.pool1 = pb.AvgPool2dSemiMap(
-                self.i1, kernel_size, stride[0], tick_wait_start=1
-            )
-            self.pool2 = pb.AvgPool2dSemiMap(
-                self.pool1, kernel_size, stride[1], tick_wait_start=3
-            )
-        else:
-            self.pool1 = pb.MaxPool2dSemiMap(
-                self.i1, kernel_size, stride[0], tick_wait_start=1
-            )
-            self.pool2 = pb.MaxPool2dSemiMap(
-                self.pool1, kernel_size, stride[1], tick_wait_start=3
+        self.pool_list = NodeList()
+
+        for i, (ksize, stride) in enumerate(zip(kernel_sizes, strides)):
+            self.pool_list.append(
+                _pool_semi_op[pool_type](
+                    self.pool_list[-1] if i > 0 else self.i1,
+                    ksize,
+                    stride,
+                    tick_wait_start=1 + 2 * i,
+                )
             )
+
         self.linear1 = pb.LinearSemiFolded(
-            self.pool2,
-            2,
+            self.pool_list[-1],
+            out_features,
             weights=weight,
             bias=0,
             conn_type=pb.SynConnType.All2All,
-            tick_wait_start=5,
+            tick_wait_start=self.pool_list[-1].tick_wait_start + 2,
         )
 
 

From 9fe277b25175d37d512dbb800f3ade6a87687676 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 6 Sep 2024 19:58:11 +0800
Subject: [PATCH 077/187] =?UTF-8?q?=E2=9C=85=20improve=20efficiency=20for?=
 =?UTF-8?q?=20testing=20semi-folded=20conv=20+=20linear.=20Padding=20for?=
 =?UTF-8?q?=20the=20first=20conv=20remains=20untested?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 89 ++++++++++++-----------------
 1 file changed, 38 insertions(+), 51 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 86ef22e1..ac90b32e 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -830,46 +830,34 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.export(fp=ensure_dump_dir)
 
     @pytest.mark.parametrize(
-        "ishape_chw, n_conv, kshape_oihw, stride, padding, out_features",
+        # NOTE: Only support padding in the first semi-folded conv2d for now.
+        "ishape_chw, n_conv, kshape_oihw, stride, padding1, out_features",
         [
             # n_conv = 1
-            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], [0], (10,)),
-            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], [0], (10,)),
-            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], [0], (10,)),
-            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], [0], (4, 2)),
-            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], [0], 10),
-            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], [0], (3, 3)),
-            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], [0], (10,)),
-            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], [0], 10),
+            ((3, 11, 11), 1, [(12, 3, 3, 3)], [1], 0, (10,)),
+            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], 0, (10,)),
+            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], 0, (10,)),
+            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], 0, (10,)),
+            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], 0, (4, 2)),
+            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], 0, 10),
+            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], 0, (3, 3)),
+            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], 0, (10,)),
+            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], 0, 10),
             # n_conv = 2
-            (
-                (4, 32, 32),
-                2,
-                [(8, 4, 3, 3), (12, 8, 4, 4)],
-                [(2, 2), (2, 2)],
-                [0, 0],
-                10,
-            ),
-            (
-                (4, 32, 32),
-                2,
-                [(8, 4, 3, 3), (12, 8, 4, 4)],
-                [(2, 2), (1, 1)],
-                [0, 0],
-                10,
-            ),
-            ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [0, 0], 10),
-            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], [0, 0], 10),
-            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], [0, 0], 10),
-            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], [0, 0], 10),
-            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [2, 2], [0, 0], 10),
+            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (2, 2)], 0, 10),
+            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (1, 1)], 0, 10),
+            ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], 0, 10),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], 0, 10),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], 0, 10),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], 0, 10),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [2, 2], 0, 10),
             # n_conv = 3
             (
                 (4, 32, 32),
                 3,
                 [(8, 4, 3, 3), (16, 8, 3, 3), (8, 16, 2, 2)],
                 [2, 1, 1],
-                [0, 0, 0],
+                0,
                 3,
             ),
             (
@@ -877,7 +865,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 3,
                 [(16, 3, 3, 3), (32, 16, 3, 3), (10, 32, 3, 3)],
                 [1, 1, 1],
-                [0, 0, 0],
+                0,
                 10,
             ),
         ],
@@ -888,55 +876,55 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         n_conv,
         kshape_oihw,
         stride,
-        padding,
+        padding1,
         out_features,
-        random_fixture,
+        fixed_rng,
     ):
         """Test the network with N semi-folded conv2d + 1 semi-folded linear."""
         from tests.shared_networks import Conv2dSemiFolded_FC_ChainNetN
 
-        assert n_conv == len(kshape_oihw) == len(stride) == len(padding)
+        assert n_conv == len(kshape_oihw) == len(stride)
+        assert ishape_chw[0] == kshape_oihw[0][1]
+
         kernels = []
         strides = []
-        paddings = []
+        paddings = [_pair(padding1)] + [(0, 0) for _ in range(1, n_conv)]
         ocs = []
         ohs = []
         ows = []
 
         for i_conv in range(n_conv):
-            kshape, s, p = kshape_oihw[i_conv], stride[i_conv], padding[i_conv]
+            kshape, s = kshape_oihw[i_conv], stride[i_conv]
 
             k = np.random.randint(-3, 4, size=kshape, dtype=WEIGHT_DTYPE)
             _stride = _pair(s)
-            _padding = _pair(p)
             kernels.append(k)
             strides.append(_stride)
-            paddings.append(_padding)
 
             ih = ishape_chw[1] if i_conv == 0 else ohs[-1]
             iw = ishape_chw[2] if i_conv == 0 else ows[-1]
             oc = kshape[0]
-            oh = (ih + 2 * _padding[0] - kshape[2]) // _stride[0] + 1
-            ow = (iw + 2 * _padding[1] - kshape[3]) // _stride[1] + 1
+            oh = (ih - kshape[2]) // _stride[0] + 1
+            ow = (iw - kshape[3]) // _stride[1] + 1
             ocs.append(oc)
             ohs.append(oh)
             ows.append(ow)
 
-        fc_weight = np.random.randint(
+        fc_weight = fixed_rng.integers(
             -4,
             5,
             size=(ocs[-1] * ohs[-1] * ows[-1], shape2num(out_features)),
             dtype=WEIGHT_DTYPE,
         )
 
-        net2 = Conv2dSemiFolded_FC_ChainNetN(
+        net1 = Conv2dSemiFolded_FC_ChainNetN(
             ishape_chw[:2], kernels, strides, paddings, out_features, fc_weight
         )
-        # `conv_list` will be removed in `build_fmodule`
-        conv2d_list = net2.conv_list.copy()
-        linear = net2.linear1
-        generated = DynSysGroup.build_fmodule(net2)
-        sim1 = pb.Simulator(net2, start_time_zero=False)
+        # `net1.conv_list` will be removed in `build_fmodule`
+        conv2d_list = net1.conv_list.copy()
+        linear = net1.linear1
+        generated = DynSysGroup.build_fmodule(net1)
+        sim1 = pb.Simulator(net1, start_time_zero=False)
 
         probe_conv_list = []
         for conv2d in conv2d_list:
@@ -965,7 +953,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         n_test = 3  # can be more
         for _ in range(n_test):
             sim1.reset()
-            inpa = np.random.randint(0, 3, size=ishape_chw, dtype=VOLTAGE_DTYPE)
+            inpa = fixed_rng.integers(0, 4, size=ishape_chw, dtype=NEUOUT_U8_DTYPE)
             inp_pad0 = np.concatenate(
                 [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
             )
@@ -987,7 +975,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                 )
 
                 # Check the result of semi-folded convolutions.
-                for i in range(ow):
+                for i in range(ows[i_conv]):
                     assert np.array_equal(
                         x[:, :, i].ravel(),
                         sim1.data[probe_conv_list[i_conv]][
@@ -1000,7 +988,6 @@ def test_Conv2dSemiFolded_FC_ChainNet(
 
             # x is the reference result of the last convolution.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
-
             # Check the result of semi-folded linear.
             assert np.array_equal(
                 expected_fc_t,

From c5d2848c73f752e37a90f6ef1a9875c18d33b8ab Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Thu, 19 Sep 2024 00:26:36 +0800
Subject: [PATCH 078/187] support padding

---
 paibox/components/_modules.py            | 12 +--
 paibox/components/functional.py          | 79 +++++++++++++++----
 paibox/components/synapses/conv_utils.py | 16 ++--
 paibox/network.py                        |  5 +-
 tests/components/test_functional.py      | 97 ++++++++++++++++--------
 5 files changed, 150 insertions(+), 59 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index f98e9692..19c04d45 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -167,17 +167,19 @@ def build(
 class _SemiFoldedModule(FunctionalModule, _HasSemiFoldedIntf):
     valid_interval: int = 1
     """The interval of valid output data"""
+    ts_1st_valid: int = 0
 
     @staticmethod
     def _w_padding_check(w_padding: int, prev_node: Union[NeuDyn, InputProj]) -> None:
         # NOTE: Only support padding in the first semi-folded conv2d for now.
         # In fact, it is rare for the H & W directions to be padded unequally.
         # TODO Support H padding
-        if w_padding > 0 and not isinstance(prev_node, InputProj):
-            raise NotSupportedError(
-                "only semi-folded convolutions that connect input projection "
-                "are supported to have padding in the W direction."
-            )
+        # if w_padding > 0 and not isinstance(prev_node, InputProj):
+        #     raise NotSupportedError(
+        #         "only semi-folded convolutions that connect input projection "
+        #         "are supported to have padding in the W direction."
+        #     )
+        return
 
 
 class _LinearBase(FunctionalModule):
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index e67d5bd6..36d47d5a 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -7,7 +7,7 @@
 from paicorelib import NTM, RM, TM
 
 from paibox.base import NeuDyn, NodeList
-from paibox.exceptions import PAIBoxDeprecationWarning, ShapeError
+from paibox.exceptions import PAIBoxDeprecationWarning, ShapeError, ResourceError
 from paibox.network import DynSysGroup
 from paibox.types import (
     LEAK_V_DTYPE,
@@ -871,7 +871,8 @@ def build(
         self.valid_interval = valid_interval
 
         in_ch, in_h = self.module_intf.operands[0].shape_out
-
+        if in_ch * in_h * in_h * valid_interval > 18432:
+            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
@@ -967,7 +968,7 @@ def __init__(
         out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
 
         if in_ch != cin:
-            raise ShapeError(f"input channels mismatch: {in_ch} != {cin}.")
+            raise ShapeError(f"The channels mismatch: {in_ch} != {cin}.")
 
         super().__init__(
             neuron_s,
@@ -981,7 +982,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, input_valid: int, **build_options
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         # if len(self.module_intf.operands[0].shape_out) != 2:
@@ -990,11 +991,18 @@ def build(
         #     )
         #     self.module_intf.operands[0].shape_change((in_ch, in_h))
         self.valid_interval = valid_interval
-
         _, in_h = self.module_intf.operands[0].shape_out
         _, cin, _, kw = self.kernel.shape
-
+        ts_1st_valid = (
+                input_valid
+                + (kw - 1 - self.padding[0]) * valid_interval
+        )
+        self.ts_1st_valid = ts_1st_valid
+        tick_wait_end = 1 + ts_1st_valid + (self.shape_out[1]-1) * valid_interval * self.stride[1]
+        if cin * in_h * kw * valid_interval > 18432:
+            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
         n_delays = NodeList()
+        n_copies = NodeList()
         s_delays = NodeList()
         s_kernel = NodeList()
 
@@ -1004,7 +1012,7 @@ def build(
             self.bit_trunc,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
-            tick_wait_end=self.tick_wait_end,
+            tick_wait_end=tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
@@ -1014,9 +1022,9 @@ def build(
                 (cin, in_h),
                 delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=self.tick_wait_end,
+                tick_wait_end=tick_wait_end,
                 keep_shape=self.keep_shape,
-                name=f"n{i}_{self.name}",
+                name=f"n{i}_delay_{self.name}",
             )
             n_delays.append(neuron)
             # delay synapses
@@ -1040,6 +1048,38 @@ def build(
             )
             s_kernel.append(syn2)
 
+        if input_valid > 0:
+            for i in range(self.padding[0]):
+                neuron = ANNBypassNeuron(
+                    (cin, in_h),
+                    delay=valid_interval * (kw-1-i) + 1,
+                    tick_wait_start=self.tick_wait_start,
+                    tick_wait_end=input_valid,
+                    keep_shape=self.keep_shape,
+                    name=f"n{i}_copy_{self.name}",
+                )
+
+                n_copies.append(neuron)
+                # delay synapses
+                syn1 = FullConnSyn(
+                    self.module_intf.operands[0],
+                    n_copies[i],
+                    weights=_delay_mapping(in_h, cin),
+                    conn_type=ConnType.All2All,
+                    name=f"s{i}_copy_{self.name}",
+                )
+                s_delays.append(syn1)
+
+                syn2 = Conv2dSemiFoldedSyn(  # cin, ih -> cout * oh
+                    n_copies[i],
+                    n_conv2d,
+                    -(self.kernel[:, :, :, i]),
+                    self.stride,
+                    self.padding,
+                    "OIL",
+                    name=f"neg_s{i}_{self.name}",
+                )
+                s_kernel.append(syn2)
         generated = [n_conv2d, *n_delays, *s_delays, *s_kernel]
         self._rebuild_out_intf(network, n_conv2d, *generated, **build_options)
 
@@ -1195,7 +1235,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, input_valid: int, **build_options
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         # if len(self.module_intf.operands[0].shape_out) != 2:
@@ -1209,6 +1249,16 @@ def build(
         cin = in_ch
         _, kw = self.kernel_size
 
+        ts_1st_valid = (
+                input_valid
+                + (kw - 1) * valid_interval
+        )
+        self.ts_1st_valid = ts_1st_valid
+        tick_wait_end = 1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+
+        if cin * in_h * kw * valid_interval > 18432:
+            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
+
         n_delays = NodeList()
         s_delays = NodeList()
 
@@ -1216,7 +1266,7 @@ def build(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
-            tick_wait_end=self.tick_wait_end,
+            tick_wait_end=tick_wait_end,
             pool_max=True,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
@@ -1227,7 +1277,7 @@ def build(
                 (cin, in_h),
                 delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=self.tick_wait_end,
+                tick_wait_end=tick_wait_end,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1298,7 +1348,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, **build_options
+        self, network: DynSysGroup, valid_interval: int, input_valid: int, **build_options
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         # if len(self.module_intf.operands[0].shape_out) != 2:
@@ -1311,6 +1361,9 @@ def build(
         in_ch, in_h = self.module_intf.operands[0].shape_out
         cin = in_ch
         kh, kw = self.kernel_size
+        if cin * in_h * kw * valid_interval > 18432:
+            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
+
         # NOTE: Division is achieved with the help of truncation operation.
         # It can only be approximated to a power of an integer of 2.
         bit_trunc = 8 + (kh * kw).bit_length() - 1
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index fd3f561f..6297765f 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -193,22 +193,22 @@ def _conv2d_semifolded_unroll(
     cout, cin, kh = kernel.shape
     ih = in_shape[1] + 2 * padding[0]
     _, oh = out_shape
-    w_np = np.zeros((cin * ih, cout * oh), dtype=kernel.dtype)
-
+    w_np = np.zeros((cin * in_shape[1], cout * oh), dtype=kernel.dtype)
     for i in range(cout):
         for j in range(cin):
+            w_block = np.zeros((ih, oh), dtype=kernel.dtype)
             for k in range(oh):
-                w_np[
-                    j * ih + k * stride[1] : j * ih + k * stride[1] + kh, i * oh + k
+                w_block[
+                    k * stride[1] : k * stride[1] + kh, k
                 ] = kernel[i, j, :]
 
-            if padding[0] > 0: # H direction
-                w_np = np.delete(
-                    w_np,
+            if padding[0] > 0:# H direction
+                w_block = np.delete(
+                    w_block,
                     np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
                     axis=0,
                 )
-
+            w_np[j*in_shape[1]: (j+1)*in_shape[1], i*oh : (i+1)*oh] = w_block
     return w_np
 
 
diff --git a/paibox/network.py b/paibox/network.py
index d28d942f..f3fc6da4 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -93,15 +93,16 @@ def build_fmodule(
         # If the input data is input continuously on the W-axis, the initial
         # valid interval for the first semi-folded component is 1.
         semi_valid_interval = 1
-
+        ts_1st_valid = 0
         for module in modules.values():
             if isinstance(
                 module, (Conv2dSemiFolded, MaxPool2dSemiFolded, AvgPool2dSemiFolded)
             ):
                 generated[module] = module.build(
-                    network, semi_valid_interval, **build_options
+                    network, semi_valid_interval, ts_1st_valid, **build_options
                 )
                 semi_valid_interval *= module.stride[1]
+                ts_1st_valid = module.ts_1st_valid
             elif isinstance(module, LinearSemiFolded):
                 generated[module] = module.build(
                     network, semi_valid_interval, **build_options
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index ac90b32e..2526536c 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -831,33 +831,35 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
 
     @pytest.mark.parametrize(
         # NOTE: Only support padding in the first semi-folded conv2d for now.
-        "ishape_chw, n_conv, kshape_oihw, stride, padding1, out_features",
+        "ishape_chw, n_conv, kshape_oihw, stride, padding, out_features",
         [
             # n_conv = 1
-            ((3, 11, 11), 1, [(12, 3, 3, 3)], [1], 0, (10,)),
-            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], 0, (10,)),
-            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], 0, (10,)),
-            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], 0, (10,)),
-            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], 0, (4, 2)),
-            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], 0, 10),
-            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], 0, (3, 3)),
-            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], 0, (10,)),
-            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], 0, 10),
+            ((3, 11, 11), 1, [(1, 3, 3, 3)], [1], [1], (10,)),
+            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], [2], (10,)),
+            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], [2], (10,)),
+            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], [1], (10,)),
+            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], [0], (4, 2)),
+            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], [0], 10),
+            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], [0], (3, 3)),
+            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], [0], (10,)),
+            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], [0], 10),
             # n_conv = 2
-            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (2, 2)], 0, 10),
-            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (1, 1)], 0, 10),
-            ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], 0, 10),
-            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], 0, 10),
-            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], 0, 10),
-            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], 0, 10),
-            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [2, 2], 0, 10),
+            ((1, 5, 5), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [(1, 1), (1, 1)], [2, 2], 10),
+            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (2, 2)], [1, 1], 10),
+            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (1, 1)], [1, 2], 10),
+            ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], [2, 2], 10),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], [2, 2], 10),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], [2, 1], 10),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
+
             # n_conv = 3
             (
                 (4, 32, 32),
                 3,
                 [(8, 4, 3, 3), (16, 8, 3, 3), (8, 16, 2, 2)],
-                [2, 1, 1],
-                0,
+                [1, 1, 1],
+                [1, 1, 1],
                 3,
             ),
             (
@@ -865,9 +867,34 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 3,
                 [(16, 3, 3, 3), (32, 16, 3, 3), (10, 32, 3, 3)],
                 [1, 1, 1],
-                0,
+                [1, 0, 1],
                 10,
             ),
+            (
+                    (1, 224, 224),
+                    3,
+                    [(1, 1, 7, 7), (1, 1, 5, 5), (1, 1, 3, 3)],
+                    [2, 2, 2],
+                    [3, 2, 1],
+                    10,
+            ),
+            (
+                    (3, 32, 32),
+                    3,
+                    [(3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3)],
+                    [1, 2, 1],
+                    [1, 0, 1],
+                    10,
+            ),
+            # n_conv = 5
+            (
+                    (3, 32, 32),
+                    5,
+                    [(3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3)],
+                    [1, 2, 1, 2, 1],
+                    [1, 0, 1, 0, 1],
+                    10,
+            ),
         ],
     )
     def test_Conv2dSemiFolded_FC_ChainNet(
@@ -876,7 +903,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         n_conv,
         kshape_oihw,
         stride,
-        padding1,
+        padding,
         out_features,
         fixed_rng,
     ):
@@ -888,24 +915,26 @@ def test_Conv2dSemiFolded_FC_ChainNet(
 
         kernels = []
         strides = []
-        paddings = [_pair(padding1)] + [(0, 0) for _ in range(1, n_conv)]
+        paddings = []
         ocs = []
         ohs = []
         ows = []
 
         for i_conv in range(n_conv):
-            kshape, s = kshape_oihw[i_conv], stride[i_conv]
+            kshape, s, p = kshape_oihw[i_conv], stride[i_conv], padding[i_conv]
 
             k = np.random.randint(-3, 4, size=kshape, dtype=WEIGHT_DTYPE)
             _stride = _pair(s)
+            _padding = _pair(p)
             kernels.append(k)
             strides.append(_stride)
+            paddings.append(_padding)
 
             ih = ishape_chw[1] if i_conv == 0 else ohs[-1]
             iw = ishape_chw[2] if i_conv == 0 else ows[-1]
             oc = kshape[0]
-            oh = (ih - kshape[2]) // _stride[0] + 1
-            ow = (iw - kshape[3]) // _stride[1] + 1
+            oh = (ih - kshape[2] + 2 * paddings[i_conv][0]) // _stride[0] + 1
+            ow = (iw - kshape[3] + 2 * paddings[i_conv][0]) // _stride[1] + 1
             ocs.append(oc)
             ohs.append(oh)
             ows.append(ow)
@@ -943,20 +972,25 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         ts_1st_valid = [0] * n_conv
         for i in range(n_conv):
             if i == 0:
-                ts_1st_valid[i] = kshape_oihw[0][-1] * semi_valid_interval[0]
+                ts_1st_valid[i] = (kshape_oihw[0][-1] - padding[0]) * semi_valid_interval[0]
             else:
                 ts_1st_valid[i] = (
                     ts_1st_valid[i - 1]
-                    + (kshape_oihw[i][-1] - 1) * semi_valid_interval[i]
+                    + (kshape_oihw[i][-1] - 1 - padding[i]) * semi_valid_interval[i]
                 )
 
         n_test = 3  # can be more
         for _ in range(n_test):
             sim1.reset()
             inpa = fixed_rng.integers(0, 4, size=ishape_chw, dtype=NEUOUT_U8_DTYPE)
-            inp_pad0 = np.concatenate(
-                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
-            )
+            if inpa.shape[-1] < 10:
+                inp_pad0 = np.concatenate(
+                    [inpa, np.zeros((inpa.shape[0], inpa.shape[1], 15), dtype=inpa.dtype)], axis=2, dtype=inpa.dtype
+                )
+            else:
+                inp_pad0 = np.concatenate(
+                    [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+                )
 
             for i in range(inp_pad0.shape[-1]):
                 pb.FRONTEND_ENV.save(data1=inp_pad0[:, :, i])
@@ -974,7 +1008,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                     )
                 )
 
-                # Check the result of semi-folded convolutions.
+                #Check the result of semi-folded convolutions.
                 for i in range(ows[i_conv]):
                     assert np.array_equal(
                         x[:, :, i].ravel(),
@@ -988,6 +1022,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
 
             # x is the reference result of the last convolution.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
+
             # Check the result of semi-folded linear.
             assert np.array_equal(
                 expected_fc_t,

From e2a9379c1189ca2a32233208fcc2f08d4e161858 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 19 Sep 2024 01:36:51 +0000
Subject: [PATCH 079/187] :rotating_light: auto fix by pre-commit hooks

---
 docs/Guide-of-Test.md                    | 12 ++--
 paibox/__init__.py                       | 33 ++++++-----
 paibox/components/functional.py          | 60 +++++++++++++-------
 paibox/components/neuron/neurons.py      |  3 +-
 paibox/components/synapses/__init__.py   |  7 ++-
 paibox/components/synapses/conv_utils.py | 10 ++--
 paibox/components/synapses/transforms.py |  2 +-
 paibox/network.py                        |  4 +-
 tests/components/test_functional.py      | 72 +++++++++++++++---------
 tests/utils.py                           |  1 -
 10 files changed, 122 insertions(+), 82 deletions(-)

diff --git a/docs/Guide-of-Test.md b/docs/Guide-of-Test.md
index 6eb3724e..8146b0b4 100644
--- a/docs/Guide-of-Test.md
+++ b/docs/Guide-of-Test.md
@@ -125,13 +125,13 @@ pytest = "^8.0.0"
 
 6. 固定种子的随机数生成器。该夹具返回一个固定的随机数生成器，通过该生成器生成的随机数可复现。
 
-    ```python
-    @pytest.fixture
-    def fixed_rng() -> np.random.Generator:
-        return np.random.default_rng(42)
+   ```python
+   @pytest.fixture
+   def fixed_rng() -> np.random.Generator:
+       return np.random.default_rng(42)
 
-    def test_foo(fixed_rng):
-        fixed_rng.random(...)
+   def test_foo(fixed_rng):
+       fixed_rng.random(...)
    ```
 
 ## 更多
diff --git a/paibox/__init__.py b/paibox/__init__.py
index 0aec89a4..df0bfe40 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -4,11 +4,17 @@
 from .backend import BACKEND_CONFIG as BACKEND_CONFIG
 from .backend import Mapper as Mapper
 
+# Functional modules in ANN mode only
 # Functional modules
+from .components.functional import AvgPool2dSemiFolded as AvgPool2dSemiFolded
 from .components.functional import BitwiseAND as BitwiseAND
 from .components.functional import BitwiseNOT as BitwiseNOT
 from .components.functional import BitwiseOR as BitwiseOR
 from .components.functional import BitwiseXOR as BitwiseXOR
+from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
+from .components.functional import Linear as Linear
+from .components.functional import LinearSemiFolded as LinearSemiFolded
+from .components.functional import MaxPool2dSemiFolded as MaxPool2dSemiFolded
 from .components.functional import SpikingAdd as SpikingAdd
 from .components.functional import SpikingAvgPool1d as SpikingAvgPool1d
 from .components.functional import SpikingAvgPool1dWithV as SpikingAvgPool1dWithV
@@ -20,22 +26,15 @@
 from .components.functional import Transpose2d as Transpose2d
 from .components.functional import Transpose3d as Transpose3d
 
-# Functional modules in ANN mode only
-from .components.functional import Linear as Linear
-from .components.functional import LinearSemiFolded as LinearSemiFolded
-from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
-from .components.functional import AvgPool2dSemiFolded as AvgPool2dSemiFolded
-from .components.functional import MaxPool2dSemiFolded as MaxPool2dSemiFolded
-
 # Reduced neurons
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
+from .components.neuron.neurons import ANNBypassNeuron as ANNBypassNeuron
+from .components.neuron.neurons import ANNNeuron as ANNNeuron
+from .components.neuron.neurons import BypassNeuron as BypassNeuron
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
 from .components.neuron.neurons import SpikingRelu as SpikingRelu
 from .components.neuron.neurons import TonicSpiking as TonicSpiking
-from .components.neuron.neurons import BypassNeuron as BypassNeuron
-from .components.neuron.neurons import ANNBypassNeuron as ANNBypassNeuron
-from .components.neuron.neurons import ANNNeuron as ANNNeuron
 
 # Input projection
 from .components.projection import InputProj as InputProj
@@ -49,21 +48,21 @@
 from .components.synapses.synapses import FullConn as FullConn
 from .components.synapses.synapses import MatMul2d as MatMul2d
 
+# Frontend context
+from .context import FRONTEND_ENV as FRONTEND_ENV
+
 # Network
 from .network import DynSysGroup as DynSysGroup
 from .network import Network  # alias for DynSysGroup
 
-# Simulation
-from .simulator import Probe as Probe
-from .simulator import Simulator as Simulator
-
-# Frontend context
-from .context import FRONTEND_ENV as FRONTEND_ENV
-
 # Auxiliary containers
 from .node import NodeDict as NodeDict
 from .node import NodeList as NodeList
 
+# Simulation
+from .simulator import Probe as Probe
+from .simulator import Simulator as Simulator
+
 try:
     __version__ = version("paibox")
 except Exception:
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 36d47d5a..147ff927 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -7,15 +7,15 @@
 from paicorelib import NTM, RM, TM
 
 from paibox.base import NeuDyn, NodeList
-from paibox.exceptions import PAIBoxDeprecationWarning, ShapeError, ResourceError
+from paibox.exceptions import PAIBoxDeprecationWarning, ResourceError, ShapeError
 from paibox.network import DynSysGroup
 from paibox.types import (
     LEAK_V_DTYPE,
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
     WEIGHT_DTYPE,
-    IntScalarType,
     DataType,
+    IntScalarType,
     NeuOutType,
     VoltageType,
     WeightType,
@@ -37,7 +37,7 @@
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, FullConnSyn, Conv2dSemiFoldedSyn, MaxPool2dSemiFoldedSyn
+from .synapses import ConnType, Conv2dSemiFoldedSyn, FullConnSyn, MaxPool2dSemiFoldedSyn
 from .synapses.conv_types import _Size1Type, _Size2Type
 from .synapses.conv_utils import _pair
 
@@ -872,7 +872,9 @@ def build(
 
         in_ch, in_h = self.module_intf.operands[0].shape_out
         if in_ch * in_h * in_h * valid_interval > 18432:
-            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
+            raise ResourceError(
+                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
+            )
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
@@ -982,7 +984,11 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, input_valid: int, **build_options
+        self,
+        network: DynSysGroup,
+        valid_interval: int,
+        input_valid: int,
+        **build_options,
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         # if len(self.module_intf.operands[0].shape_out) != 2:
@@ -993,14 +999,15 @@ def build(
         self.valid_interval = valid_interval
         _, in_h = self.module_intf.operands[0].shape_out
         _, cin, _, kw = self.kernel.shape
-        ts_1st_valid = (
-                input_valid
-                + (kw - 1 - self.padding[0]) * valid_interval
-        )
+        ts_1st_valid = input_valid + (kw - 1 - self.padding[0]) * valid_interval
         self.ts_1st_valid = ts_1st_valid
-        tick_wait_end = 1 + ts_1st_valid + (self.shape_out[1]-1) * valid_interval * self.stride[1]
+        tick_wait_end = (
+            1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        )
         if cin * in_h * kw * valid_interval > 18432:
-            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
+            raise ResourceError(
+                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
+            )
         n_delays = NodeList()
         n_copies = NodeList()
         s_delays = NodeList()
@@ -1052,7 +1059,7 @@ def build(
             for i in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, in_h),
-                    delay=valid_interval * (kw-1-i) + 1,
+                    delay=valid_interval * (kw - 1 - i) + 1,
                     tick_wait_start=self.tick_wait_start,
                     tick_wait_end=input_valid,
                     keep_shape=self.keep_shape,
@@ -1235,7 +1242,11 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, input_valid: int, **build_options
+        self,
+        network: DynSysGroup,
+        valid_interval: int,
+        input_valid: int,
+        **build_options,
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         # if len(self.module_intf.operands[0].shape_out) != 2:
@@ -1249,15 +1260,16 @@ def build(
         cin = in_ch
         _, kw = self.kernel_size
 
-        ts_1st_valid = (
-                input_valid
-                + (kw - 1) * valid_interval
-        )
+        ts_1st_valid = input_valid + (kw - 1) * valid_interval
         self.ts_1st_valid = ts_1st_valid
-        tick_wait_end = 1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        tick_wait_end = (
+            1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        )
 
         if cin * in_h * kw * valid_interval > 18432:
-            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
+            raise ResourceError(
+                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
+            )
 
         n_delays = NodeList()
         s_delays = NodeList()
@@ -1348,7 +1360,11 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, input_valid: int, **build_options
+        self,
+        network: DynSysGroup,
+        valid_interval: int,
+        input_valid: int,
+        **build_options,
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         # if len(self.module_intf.operands[0].shape_out) != 2:
@@ -1362,7 +1378,9 @@ def build(
         cin = in_ch
         kh, kw = self.kernel_size
         if cin * in_h * kw * valid_interval > 18432:
-            raise ResourceError(f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
+            raise ResourceError(
+                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
+            )
 
         # NOTE: Division is achieved with the help of truncation operation.
         # It can only be approximated to a power of an integer of 2.
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 5ee91e8e..c2fc4bcb 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -1,3 +1,4 @@
+import sys
 from typing import Optional
 
 import numpy as np
@@ -9,8 +10,6 @@
 from .base import Neuron
 from .utils import LEAK_V_MAX
 
-import sys
-
 if sys.version_info >= (3, 13):
     from typing import deprecated
 else:
diff --git a/paibox/components/synapses/__init__.py b/paibox/components/synapses/__init__.py
index 1f6f60f0..7459cce0 100644
--- a/paibox/components/synapses/__init__.py
+++ b/paibox/components/synapses/__init__.py
@@ -1,2 +1,7 @@
-from .base import Conv2dSemiFoldedSyn, FullConnectedSyn, FullConnSyn, MaxPool2dSemiFoldedSyn
+from .base import (
+    Conv2dSemiFoldedSyn,
+    FullConnectedSyn,
+    FullConnSyn,
+    MaxPool2dSemiFoldedSyn,
+)
 from .transforms import ConnType
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 6297765f..847aa878 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -198,17 +198,17 @@ def _conv2d_semifolded_unroll(
         for j in range(cin):
             w_block = np.zeros((ih, oh), dtype=kernel.dtype)
             for k in range(oh):
-                w_block[
-                    k * stride[1] : k * stride[1] + kh, k
-                ] = kernel[i, j, :]
+                w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[i, j, :]
 
-            if padding[0] > 0:# H direction
+            if padding[0] > 0:  # H direction
                 w_block = np.delete(
                     w_block,
                     np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
                     axis=0,
                 )
-            w_np[j*in_shape[1]: (j+1)*in_shape[1], i*oh : (i+1)*oh] = w_block
+            w_np[j * in_shape[1] : (j + 1) * in_shape[1], i * oh : (i + 1) * oh] = (
+                w_block
+            )
     return w_np
 
 
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index e3e25ece..df9c694a 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -22,8 +22,8 @@
     _conv1d_faster,
     _conv1d_unroll,
     _conv2d_faster,
-    _conv2d_unroll,
     _conv2d_semifolded_unroll,
+    _conv2d_unroll,
     _convtranspose1d_faster,
     _convtranspose1d_unroll,
     _convtranspose2d_faster,
diff --git a/paibox/network.py b/paibox/network.py
index f3fc6da4..3d2a9eae 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -1,5 +1,6 @@
-from typing import Optional, Union
 import sys
+from typing import Optional, Union
+
 import numpy as np
 
 from .base import DynamicSys, SynSys
@@ -9,7 +10,6 @@
 from .mixin import Container
 from .node import NodeDict, NodeList
 
-
 if sys.version_info >= (3, 10):
     from typing import TypeAlias
 else:
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 2526536c..af024efa 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -5,14 +5,14 @@
 from paibox.base import DynamicSys
 from paibox.components import NeuModule
 from paibox.components.neuron.base import MetaNeuron
-from paibox.components.synapses.conv_utils import _pair, _single, _conv2d_faster
+from paibox.components.synapses.conv_utils import _conv2d_faster, _pair, _single
 from paibox.network import DynSysGroup
 from paibox.types import (
+    NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
     WEIGHT_DTYPE,
     NeuOutType,
     VoltageType,
-    NEUOUT_U8_DTYPE,
 )
 from paibox.utils import as_shape, shape2num, typical_round
 
@@ -845,14 +845,27 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
             ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], [0], 10),
             # n_conv = 2
             ((1, 5, 5), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [(1, 1), (1, 1)], [2, 2], 10),
-            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (2, 2)], [1, 1], 10),
-            ((4, 32, 32), 2, [(8, 4, 3, 3), (12, 8, 4, 4)], [(2, 2), (1, 1)], [1, 2], 10),
+            (
+                (4, 32, 32),
+                2,
+                [(8, 4, 3, 3), (12, 8, 4, 4)],
+                [(2, 2), (2, 2)],
+                [1, 1],
+                10,
+            ),
+            (
+                (4, 32, 32),
+                2,
+                [(8, 4, 3, 3), (12, 8, 4, 4)],
+                [(2, 2), (1, 1)],
+                [1, 2],
+                10,
+            ),
             ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
             ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], [2, 2], 10),
             ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], [2, 2], 10),
             ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], [2, 1], 10),
             ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
-
             # n_conv = 3
             (
                 (4, 32, 32),
@@ -871,29 +884,29 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 10,
             ),
             (
-                    (1, 224, 224),
-                    3,
-                    [(1, 1, 7, 7), (1, 1, 5, 5), (1, 1, 3, 3)],
-                    [2, 2, 2],
-                    [3, 2, 1],
-                    10,
+                (1, 224, 224),
+                3,
+                [(1, 1, 7, 7), (1, 1, 5, 5), (1, 1, 3, 3)],
+                [2, 2, 2],
+                [3, 2, 1],
+                10,
             ),
             (
-                    (3, 32, 32),
-                    3,
-                    [(3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3)],
-                    [1, 2, 1],
-                    [1, 0, 1],
-                    10,
+                (3, 32, 32),
+                3,
+                [(3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3)],
+                [1, 2, 1],
+                [1, 0, 1],
+                10,
             ),
             # n_conv = 5
             (
-                    (3, 32, 32),
-                    5,
-                    [(3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3)],
-                    [1, 2, 1, 2, 1],
-                    [1, 0, 1, 0, 1],
-                    10,
+                (3, 32, 32),
+                5,
+                [(3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3), (3, 3, 2, 2), (3, 3, 3, 3)],
+                [1, 2, 1, 2, 1],
+                [1, 0, 1, 0, 1],
+                10,
             ),
         ],
     )
@@ -972,7 +985,9 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         ts_1st_valid = [0] * n_conv
         for i in range(n_conv):
             if i == 0:
-                ts_1st_valid[i] = (kshape_oihw[0][-1] - padding[0]) * semi_valid_interval[0]
+                ts_1st_valid[i] = (
+                    kshape_oihw[0][-1] - padding[0]
+                ) * semi_valid_interval[0]
             else:
                 ts_1st_valid[i] = (
                     ts_1st_valid[i - 1]
@@ -985,7 +1000,12 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             inpa = fixed_rng.integers(0, 4, size=ishape_chw, dtype=NEUOUT_U8_DTYPE)
             if inpa.shape[-1] < 10:
                 inp_pad0 = np.concatenate(
-                    [inpa, np.zeros((inpa.shape[0], inpa.shape[1], 15), dtype=inpa.dtype)], axis=2, dtype=inpa.dtype
+                    [
+                        inpa,
+                        np.zeros((inpa.shape[0], inpa.shape[1], 15), dtype=inpa.dtype),
+                    ],
+                    axis=2,
+                    dtype=inpa.dtype,
                 )
             else:
                 inp_pad0 = np.concatenate(
@@ -1008,7 +1028,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                     )
                 )
 
-                #Check the result of semi-folded convolutions.
+                # Check the result of semi-folded convolutions.
                 for i in range(ows[i_conv]):
                     assert np.array_equal(
                         x[:, :, i].ravel(),
diff --git a/tests/utils.py b/tests/utils.py
index 1645d446..f18ecd5a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -2,7 +2,6 @@
 from contextlib import contextmanager
 from typing import Any, Generator
 
-
 __all__ = ["measure_time"]
 
 

From aecf57effcdafcd69f141487f8aeea3a41dd4028 Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Wed, 25 Sep 2024 16:05:10 +0800
Subject: [PATCH 080/187] support avgpool padding

---
 paibox/components/functional.py     |  85 ++++++++++---
 tests/components/test_functional.py | 178 ++++++++++++++++------------
 tests/shared_networks.py            |  16 ++-
 3 files changed, 185 insertions(+), 94 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 147ff927..b15bcbb0 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1,3 +1,4 @@
+import math
 import sys
 from collections.abc import Sequence
 from functools import partial
@@ -1087,7 +1088,7 @@ def build(
                     name=f"neg_s{i}_{self.name}",
                 )
                 s_kernel.append(syn2)
-        generated = [n_conv2d, *n_delays, *s_delays, *s_kernel]
+        generated = [n_conv2d, *n_delays, *n_copies, *s_delays, *s_kernel]
         self._rebuild_out_intf(network, n_conv2d, *generated, **build_options)
 
         return generated
@@ -1209,7 +1210,7 @@ def __init__(
         neuron_s: Union[NeuDyn, InputProj],
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
-        # padding: _Size2Type = 0,
+        padding: _Size2Type = 0,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
@@ -1222,13 +1223,13 @@ def __init__(
             _stride = _pair(stride)
 
         self.stride = _stride
-        # self.padding = _pair(padding)
+        self.padding = _pair(padding)
         # self._w_padding_check(self.padding[1], neuron_s)
 
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
 
-        out_h = (in_h - self.kernel_size[0]) // self.stride[0] + 1
+        out_h = (in_h - self.kernel_size[0] + 2 * self.padding[0]) // self.stride[0] + 1
 
         super().__init__(
             neuron_s,
@@ -1307,7 +1308,7 @@ def build(
                 neuron,
                 pool2d,
                 weights=_poo2d_semifolded_mapping(
-                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride
+                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride, self.padding
                 ),
                 name=f"s{i}_{self.name}",
             )
@@ -1327,7 +1328,7 @@ def __init__(
         neuron_s: Union[NeuDyn, InputProj],
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
-        # padding: _Size2Type = 0,
+        padding: _Size2Type = 0,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
@@ -1340,13 +1341,13 @@ def __init__(
             _stride = _pair(stride)
 
         self.stride = _stride
-        # self.padding = _pair(padding)
+        self.padding = _pair(padding)
         # self._w_padding_check(self.padding[1], neuron_s)
 
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
 
-        out_h = (in_h - self.kernel_size[0]) // self.stride[0] + 1
+        out_h = (in_h - self.kernel_size[0] + 2 * self.padding[0]) // self.stride[0] + 1
 
         super().__init__(
             neuron_s,
@@ -1382,19 +1383,35 @@ def build(
                 f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
             )
 
+        ts_1st_valid = (
+                input_valid
+                + (kw - 1 - self.padding[0]) * valid_interval
+        )
+        self.ts_1st_valid = ts_1st_valid
+        tick_wait_end = 1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+
+        E = math.ceil(math.log2(cin * in_h * kw / 144))
+        E = 0 if E < 0 else E
+        if kw * valid_interval > 256 / (2 ** E):
+            raise ResourceError(
+                f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
+
+
         # NOTE: Division is achieved with the help of truncation operation.
         # It can only be approximated to a power of an integer of 2.
         bit_trunc = 8 + (kh * kw).bit_length() - 1
 
         n_delays = NodeList()
+        n_copies = NodeList()
         s_delays = NodeList()
+        s_kernel = NodeList()
 
         pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             bit_trunc=bit_trunc,
             tick_wait_start=self.tick_wait_start + 1,
-            tick_wait_end=self.tick_wait_end,
+            tick_wait_end=tick_wait_end,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
@@ -1403,7 +1420,7 @@ def build(
                 (cin, in_h),
                 delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=self.tick_wait_end,
+                tick_wait_end=tick_wait_end,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1421,13 +1438,43 @@ def build(
                 neuron,
                 pool2d,
                 weights=_poo2d_semifolded_mapping(
-                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride
+                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride, self.padding
                 ),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
             )
             s_delays.append(syn2)
+        if input_valid > 0:
+            for i in range(self.padding[0]):
+                neuron = ANNBypassNeuron(
+                    (cin, in_h),
+                    delay=valid_interval * (kw-1-i) + 1,
+                    tick_wait_start=self.tick_wait_start,
+                    tick_wait_end=input_valid,
+                    keep_shape=self.keep_shape,
+                    name=f"n{i}_copy_{self.name}",
+                )
 
+                n_copies.append(neuron)
+                # delay synapses
+                syn1 = FullConnSyn(
+                    self.module_intf.operands[0],
+                    n_copies[i],
+                    weights=_delay_mapping(in_h, cin),
+                    conn_type=ConnType.All2All,
+                    name=f"s{i}_copy_{self.name}",
+                )
+                s_delays.append(syn1)
+
+                syn2 = FullConnSyn(  # cin, ih -> cout * oh
+                    n_copies[i],
+                    pool2d,
+                    weights=-(_poo2d_semifolded_mapping(
+                        cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride, self.padding)),
+                    conn_type=ConnType.All2All,
+                    name=f"neg_s{i}_{self.name}",
+                )
+                s_kernel.append(syn2)
         generated = [pool2d, *n_delays, *s_delays]
         self._rebuild_out_intf(network, pool2d, *generated, **build_options)
 
@@ -1545,13 +1592,23 @@ def _delay_mapping(h: int, cin: int) -> WeightType:
 
 
 def _poo2d_semifolded_mapping(
-    cin: int, ih: int, oh: int, kh: int, stride: tuple[int, int]
+    cin: int, ih: int, oh: int, kh: int, stride: tuple[int, int], padding: tuple[int, int]
 ) -> WeightType:
     cout = cin
+
     m = np.zeros((cin * ih, cout * oh), dtype=WEIGHT_DTYPE)
+    m_block = np.zeros((ih+2*padding[0], oh), dtype=WEIGHT_DTYPE)
+
+    for j in range(oh):
+        m_block[j * stride[1] : j * stride[1] + kh, j] =1
+    if padding[0] > 0:
+        m_block = np.delete(
+            m_block,
+            np.hstack((np.arange(padding[0]), np.arange(ih + padding[0], ih+2*padding[0]))),
+            axis=0,
+        )
 
     for i in range(cout):
-        for j in range(oh):
-            m[i * ih + j * stride[1] : i * ih + j * stride[1] + kh, i * oh + j] = 1
+        m[i*ih: i*ih+ih, i*oh:i*oh+oh] = m_block
 
     return m
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index af024efa..36d60e81 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -25,7 +25,7 @@
 
 
 def _assert_build_fmodule(
-    network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
+        network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
 ):
     nodes = network.nodes().subset(DynamicSys).unique()
     assert len(nodes) == n_node_bef_build
@@ -427,16 +427,16 @@ def test_SpikingSub_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingPool1d(
-        self,
-        shape,
-        channels,
-        ksize,
-        stride,
-        padding,
-        threshold,
-        fm_order,
-        pool_type,
-        p_binomial,
+            self,
+            shape,
+            channels,
+            ksize,
+            stride,
+            padding,
+            threshold,
+            fm_order,
+            pool_type,
+            p_binomial,
     ):
         from tests.shared_networks import SpikingPool1d_Net
 
@@ -537,16 +537,16 @@ def test_SpikingPool1d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingPool2d(
-        self,
-        shape,
-        channels,
-        ksize,
-        stride,
-        padding,
-        threshold,
-        fm_order,
-        pool_type,
-        p_binomial,
+            self,
+            shape,
+            channels,
+            ksize,
+            stride,
+            padding,
+            threshold,
+            fm_order,
+            pool_type,
+            p_binomial,
     ):
         from tests.shared_networks import SpikingPool2d_Net
 
@@ -631,7 +631,7 @@ def test_SpikingPool2d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingAvgPool1dWithV(
-        self, shape, channels, ksize, stride, padding, threshold, p_binomial
+            self, shape, channels, ksize, stride, padding, threshold, p_binomial
     ):
         """NOTE: This function is a native implementation of SNNs and is therefore not  \
             compared to the ANN implementation."""
@@ -686,14 +686,14 @@ def test_SpikingAvgPool1dWithV_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingAvgPool2dWithV(
-        self,
-        shape,
-        channels,
-        ksize,
-        stride,
-        padding,
-        threshold,
-        p_binomial,
+            self,
+            shape,
+            channels,
+            ksize,
+            stride,
+            padding,
+            threshold,
+            p_binomial,
     ):
         """NOTE: This function is a native implementation of SNNs and is therefore not  \
             compared to the ANN implementation."""
@@ -868,20 +868,20 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
             ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
             # n_conv = 3
             (
-                (4, 32, 32),
-                3,
-                [(8, 4, 3, 3), (16, 8, 3, 3), (8, 16, 2, 2)],
-                [1, 1, 1],
-                [1, 1, 1],
-                3,
+                    (4, 32, 32),
+                    3,
+                    [(8, 4, 3, 3), (16, 8, 3, 3), (8, 16, 2, 2)],
+                    [1, 1, 1],
+                    [1, 1, 1],
+                    3,
             ),
             (
-                (3, 32, 32),
-                3,
-                [(16, 3, 3, 3), (32, 16, 3, 3), (10, 32, 3, 3)],
-                [1, 1, 1],
-                [1, 0, 1],
-                10,
+                    (3, 32, 32),
+                    3,
+                    [(16, 3, 3, 3), (32, 16, 3, 3), (10, 32, 3, 3)],
+                    [1, 1, 1],
+                    [1, 0, 1],
+                    10,
             ),
             (
                 (1, 224, 224),
@@ -911,14 +911,14 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_Conv2dSemiFolded_FC_ChainNet(
-        self,
-        ishape_chw,
-        n_conv,
-        kshape_oihw,
-        stride,
-        padding,
-        out_features,
-        fixed_rng,
+            self,
+            ishape_chw,
+            n_conv,
+            kshape_oihw,
+            stride,
+            padding,
+            out_features,
+            fixed_rng: np.random.Generator,
     ):
         """Test the network with N semi-folded conv2d + 1 semi-folded linear."""
         from tests.shared_networks import Conv2dSemiFolded_FC_ChainNetN
@@ -990,8 +990,8 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                 ) * semi_valid_interval[0]
             else:
                 ts_1st_valid[i] = (
-                    ts_1st_valid[i - 1]
-                    + (kshape_oihw[i][-1] - 1 - padding[i]) * semi_valid_interval[i]
+                        ts_1st_valid[i - 1]
+                        + (kshape_oihw[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
                 )
 
         n_test = 3  # can be more
@@ -1037,7 +1037,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                             + ts_1st_valid[i_conv]
                             + i * semi_valid_interval[i_conv + 1]
                             - 1
-                        ],
+                            ],
                     )
 
             # x is the reference result of the last convolution.
@@ -1051,52 +1051,76 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                     + ts_1st_valid[-1]
                     + (ows[-1] - 1) * semi_valid_interval[-1]
                     - 1
-                ],
+                    ],
             )
 
     @pytest.mark.parametrize(
-        "ishape_chw, n_pool, kshape_hw, stride, out_features, pool_type",
-        [
-            # NOTE: the first layer is not likely to be a pooling layer. So we
-            # don't support padding for pooling layers.
+        "ishape_chw, n_pool, kshape_hw, stride, padding, out_features, pool_type",
+        [   # n_pool = 1
+            ((3, 16, 16), 1, [2], [2], [1], (10,), "avg"),
             # n_pool = 2
-            ((3, 24, 24), 2, [2, 2], [1, 1], (2, 2), "avg"),
-            ((3, 24, 24), 2, [(2, 2), (2, 2)], [None, None], (10,), "avg"),
-            ((6, 32, 32), 2, [3, 3], [None, None], (10,), "avg"),
-            ((3, 24, 24), 2, [2, 2], [1, 1], (4,), "max"),
-            ((3, 24, 24), 2, [(2, 2), (2, 2)], [2, 2], (10,), "max"),
-            ((6, 32, 32), 2, [3, 3], [None, None], (10,), "max"),
+            ((3, 24, 24), 2, [2, 2], [1, 1], [0, 0], (2, 2), "avg"),
+            (
+                    (3, 24, 24),
+                    2,
+                    [(2, 2), (2, 2)],
+                    [None, None],
+                    [1, 1],
+                    (10,),
+                    "avg",
+            ),
+            ((4, 32, 32), 2, [3, 3], [1, 1], [(0, 0), (1, 1)], (10,), "avg"),
+            ((1, 8, 8), 2, [3, 3], [1, 1], [(0, 0), (1, 1)], (10,), "avg"),
+            ((3, 24, 24), 2, [2, 2], [1, 1], [], (4,), "max"),
+            ((3, 24, 24), 2, [(2, 2), (2, 2)], [2, 2], [], (10,), "max"),
+            ((6, 32, 32), 2, [3, 3], [None, None], [], (10,), "max"),
             # n_pool = 3
-            ((3, 48, 48), 3, [3, 2, 2], [None, None, None], (10,), "avg"),
-            ((3, 48, 48), 3, [3, 2, 2], [None, None, None], (10,), "max"),
+            ((3, 48, 48), 3, [3, 2, 2], [None, None, None], [(1, 1), (0, 0), (1, 1)], (10,), "avg"),
+            ((3, 48, 48), 3, [3, 3, 3], [2, 2, 2], [(2, 2), (0, 0), (1, 1)], (10,), "avg"),
+            ((3, 48, 48), 3, [3, 2, 2], [None, None, None], [], (10,), "max"),
+
         ],
     )
     def test_Pool2dSemiFolded_FC_ChainNet(
-        self, ishape_chw, n_pool, kshape_hw, stride, out_features, pool_type, fixed_rng
+            self,
+            ishape_chw,
+            n_pool,
+            kshape_hw,
+            stride,
+            padding,
+            out_features,
+            pool_type,
+            fixed_rng: np.random.Generator,
     ):
+        """Test the network with N semi-folded pool2d + 1 semi-folded linear."""
         from tests.shared_networks import Pool2dSemiFolded_FC_ChainNetN
 
+        if pool_type == "max":
+            padding = [(0, 0)] * n_pool
+
         assert n_pool == len(kshape_hw) == len(stride)
         ksizes = []
         strides = []
-        paddings = [(0, 0) for _ in range(n_pool)]
+        paddings = []
         ocs = []
         ohs = []
         ows = []
 
         for i_pool in range(n_pool):
-            k, s = kshape_hw[i_pool], stride[i_pool]
+            k, s, p = (kshape_hw[i_pool], stride[i_pool], padding[i_pool])
 
             _ksize = _pair(k)
             _stride = _pair(s) if s is not None else _ksize
+            _padding = _pair(p)
             ksizes.append(_ksize)
             strides.append(_stride)
+            paddings.append(_padding)
 
             ih = ishape_chw[1] if i_pool == 0 else ohs[-1]
             iw = ishape_chw[2] if i_pool == 0 else ows[-1]
             oc = ishape_chw[0]
-            oh = (ih - _ksize[0]) // _stride[0] + 1
-            ow = (iw - _ksize[1]) // _stride[1] + 1
+            oh = (ih - _ksize[0] + 2 * paddings[i_pool][0]) // _stride[0] + 1
+            ow = (iw - _ksize[1] + 2 * paddings[i_pool][0]) // _stride[1] + 1
             ocs.append(oc)
             ohs.append(oh)
             ows.append(ow)
@@ -1140,10 +1164,11 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         ts_1st_valid = [0] * n_pool
         for i in range(n_pool):
             if i == 0:
-                ts_1st_valid[i] = ksizes[0][-1] * semi_valid_interval[0]
+                ts_1st_valid[i] = (ksizes[0][-1] - paddings[0][0]) * semi_valid_interval[0]
             else:
                 ts_1st_valid[i] = (
-                    ts_1st_valid[i - 1] + (ksizes[i][-1] - 1) * semi_valid_interval[i]
+                        ts_1st_valid[i - 1]
+                        + (ksizes[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
                 )
 
         n_test = 3  # can be more
@@ -1177,11 +1202,12 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                             + ts_1st_valid[i_pool]
                             + i * semi_valid_interval[i_pool + 1]
                             - 1
-                        ],
+                            ],
                     )
 
             # x is the reference result of the last pooling.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
+
             # Check the result of semi-folded linear.
             assert np.array_equal(
                 expected_fc_t,
@@ -1190,7 +1216,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                     + ts_1st_valid[-1]
                     + (ows[-1] - 1) * semi_valid_interval[-1]
                     - 1
-                ],
+                    ],
             )
 
     @pytest.mark.parametrize(
@@ -1221,4 +1247,4 @@ def test_Linear(self, shape, weight):
             sim2.run(1)
 
         for i in range(N_TEST):
-            assert np.array_equal(sim1.data[net1.probe1][i], sim2.data[probe_linear][i])
+            assert np.array_equal(sim1.data[net1.probe1][i], sim2.data[probe_linear][i])
\ No newline at end of file
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 19a00dce..fab4942c 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -290,21 +290,29 @@ def __init__(self, shape, kernels, strides, paddings, out_features, weight):
 
 class Pool2dSemiFolded_FC_ChainNetN(pb.DynSysGroup):
     def __init__(
-        self, shape, kernel_sizes, strides, paddings, out_features, weight, pool_type
+            self, shape, kernel_sizes, strides, paddings, out_features, weight, pool_type
     ):
         super().__init__()
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
         self.pool_list = NodeList()
 
         for i, (ksize, stride) in enumerate(zip(kernel_sizes, strides)):
-            self.pool_list.append(
-                _pool_semi_op[pool_type](
+            if pool_type == "max":
+                pool = _pool_semi_op[pool_type](
                     self.pool_list[-1] if i > 0 else self.i1,
                     ksize,
                     stride,
                     tick_wait_start=1 + 2 * i,
                 )
-            )
+            else:
+                pool = _pool_semi_op[pool_type](
+                    self.pool_list[-1] if i > 0 else self.i1,
+                    ksize,
+                    stride,
+                    padding=paddings[i],
+                    tick_wait_start=1 + 2 * i,
+                )
+            self.pool_list.append(pool)
 
         self.linear1 = pb.LinearSemiFolded(
             self.pool_list[-1],

From 4f50d04e42c085fece42405e4c680d7a66d55888 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 26 Sep 2024 01:50:34 +0000
Subject: [PATCH 081/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/components/functional.py     |  60 +++++++---
 tests/components/test_functional.py | 177 +++++++++++++++-------------
 tests/shared_networks.py            |   2 +-
 3 files changed, 140 insertions(+), 99 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index b15bcbb0..e0e84263 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1308,7 +1308,12 @@ def build(
                 neuron,
                 pool2d,
                 weights=_poo2d_semifolded_mapping(
-                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride, self.padding
+                    cin,
+                    in_h,
+                    self.shape_out[1],
+                    self.kernel_size[0],
+                    self.stride,
+                    self.padding,
                 ),
                 name=f"s{i}_{self.name}",
             )
@@ -1383,19 +1388,18 @@ def build(
                 f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
             )
 
-        ts_1st_valid = (
-                input_valid
-                + (kw - 1 - self.padding[0]) * valid_interval
-        )
+        ts_1st_valid = input_valid + (kw - 1 - self.padding[0]) * valid_interval
         self.ts_1st_valid = ts_1st_valid
-        tick_wait_end = 1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        tick_wait_end = (
+            1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        )
 
         E = math.ceil(math.log2(cin * in_h * kw / 144))
         E = 0 if E < 0 else E
-        if kw * valid_interval > 256 / (2 ** E):
+        if kw * valid_interval > 256 / (2**E):
             raise ResourceError(
-                f"The {self.name} input size is too large. Please adjust the input size or the number of channels.")
-
+                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
+            )
 
         # NOTE: Division is achieved with the help of truncation operation.
         # It can only be approximated to a power of an integer of 2.
@@ -1438,7 +1442,12 @@ def build(
                 neuron,
                 pool2d,
                 weights=_poo2d_semifolded_mapping(
-                    cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride, self.padding
+                    cin,
+                    in_h,
+                    self.shape_out[1],
+                    self.kernel_size[0],
+                    self.stride,
+                    self.padding,
                 ),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
@@ -1448,7 +1457,7 @@ def build(
             for i in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, in_h),
-                    delay=valid_interval * (kw-1-i) + 1,
+                    delay=valid_interval * (kw - 1 - i) + 1,
                     tick_wait_start=self.tick_wait_start,
                     tick_wait_end=input_valid,
                     keep_shape=self.keep_shape,
@@ -1469,8 +1478,16 @@ def build(
                 syn2 = FullConnSyn(  # cin, ih -> cout * oh
                     n_copies[i],
                     pool2d,
-                    weights=-(_poo2d_semifolded_mapping(
-                        cin, in_h, self.shape_out[1], self.kernel_size[0], self.stride, self.padding)),
+                    weights=-(
+                        _poo2d_semifolded_mapping(
+                            cin,
+                            in_h,
+                            self.shape_out[1],
+                            self.kernel_size[0],
+                            self.stride,
+                            self.padding,
+                        )
+                    ),
                     conn_type=ConnType.All2All,
                     name=f"neg_s{i}_{self.name}",
                 )
@@ -1592,23 +1609,30 @@ def _delay_mapping(h: int, cin: int) -> WeightType:
 
 
 def _poo2d_semifolded_mapping(
-    cin: int, ih: int, oh: int, kh: int, stride: tuple[int, int], padding: tuple[int, int]
+    cin: int,
+    ih: int,
+    oh: int,
+    kh: int,
+    stride: tuple[int, int],
+    padding: tuple[int, int],
 ) -> WeightType:
     cout = cin
 
     m = np.zeros((cin * ih, cout * oh), dtype=WEIGHT_DTYPE)
-    m_block = np.zeros((ih+2*padding[0], oh), dtype=WEIGHT_DTYPE)
+    m_block = np.zeros((ih + 2 * padding[0], oh), dtype=WEIGHT_DTYPE)
 
     for j in range(oh):
-        m_block[j * stride[1] : j * stride[1] + kh, j] =1
+        m_block[j * stride[1] : j * stride[1] + kh, j] = 1
     if padding[0] > 0:
         m_block = np.delete(
             m_block,
-            np.hstack((np.arange(padding[0]), np.arange(ih + padding[0], ih+2*padding[0]))),
+            np.hstack(
+                (np.arange(padding[0]), np.arange(ih + padding[0], ih + 2 * padding[0]))
+            ),
             axis=0,
         )
 
     for i in range(cout):
-        m[i*ih: i*ih+ih, i*oh:i*oh+oh] = m_block
+        m[i * ih : i * ih + ih, i * oh : i * oh + oh] = m_block
 
     return m
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 36d60e81..e7c4b1fa 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -25,7 +25,7 @@
 
 
 def _assert_build_fmodule(
-        network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
+    network: DynSysGroup, n_node_bef_build: int, n_node_aft_build: int
 ):
     nodes = network.nodes().subset(DynamicSys).unique()
     assert len(nodes) == n_node_bef_build
@@ -427,16 +427,16 @@ def test_SpikingSub_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingPool1d(
-            self,
-            shape,
-            channels,
-            ksize,
-            stride,
-            padding,
-            threshold,
-            fm_order,
-            pool_type,
-            p_binomial,
+        self,
+        shape,
+        channels,
+        ksize,
+        stride,
+        padding,
+        threshold,
+        fm_order,
+        pool_type,
+        p_binomial,
     ):
         from tests.shared_networks import SpikingPool1d_Net
 
@@ -537,16 +537,16 @@ def test_SpikingPool1d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingPool2d(
-            self,
-            shape,
-            channels,
-            ksize,
-            stride,
-            padding,
-            threshold,
-            fm_order,
-            pool_type,
-            p_binomial,
+        self,
+        shape,
+        channels,
+        ksize,
+        stride,
+        padding,
+        threshold,
+        fm_order,
+        pool_type,
+        p_binomial,
     ):
         from tests.shared_networks import SpikingPool2d_Net
 
@@ -631,7 +631,7 @@ def test_SpikingPool2d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingAvgPool1dWithV(
-            self, shape, channels, ksize, stride, padding, threshold, p_binomial
+        self, shape, channels, ksize, stride, padding, threshold, p_binomial
     ):
         """NOTE: This function is a native implementation of SNNs and is therefore not  \
             compared to the ANN implementation."""
@@ -686,14 +686,14 @@ def test_SpikingAvgPool1dWithV_mapping(self, ensure_dump_dir):
         ],
     )
     def test_SpikingAvgPool2dWithV(
-            self,
-            shape,
-            channels,
-            ksize,
-            stride,
-            padding,
-            threshold,
-            p_binomial,
+        self,
+        shape,
+        channels,
+        ksize,
+        stride,
+        padding,
+        threshold,
+        p_binomial,
     ):
         """NOTE: This function is a native implementation of SNNs and is therefore not  \
             compared to the ANN implementation."""
@@ -868,20 +868,20 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
             ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
             # n_conv = 3
             (
-                    (4, 32, 32),
-                    3,
-                    [(8, 4, 3, 3), (16, 8, 3, 3), (8, 16, 2, 2)],
-                    [1, 1, 1],
-                    [1, 1, 1],
-                    3,
+                (4, 32, 32),
+                3,
+                [(8, 4, 3, 3), (16, 8, 3, 3), (8, 16, 2, 2)],
+                [1, 1, 1],
+                [1, 1, 1],
+                3,
             ),
             (
-                    (3, 32, 32),
-                    3,
-                    [(16, 3, 3, 3), (32, 16, 3, 3), (10, 32, 3, 3)],
-                    [1, 1, 1],
-                    [1, 0, 1],
-                    10,
+                (3, 32, 32),
+                3,
+                [(16, 3, 3, 3), (32, 16, 3, 3), (10, 32, 3, 3)],
+                [1, 1, 1],
+                [1, 0, 1],
+                10,
             ),
             (
                 (1, 224, 224),
@@ -911,14 +911,14 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         ],
     )
     def test_Conv2dSemiFolded_FC_ChainNet(
-            self,
-            ishape_chw,
-            n_conv,
-            kshape_oihw,
-            stride,
-            padding,
-            out_features,
-            fixed_rng: np.random.Generator,
+        self,
+        ishape_chw,
+        n_conv,
+        kshape_oihw,
+        stride,
+        padding,
+        out_features,
+        fixed_rng: np.random.Generator,
     ):
         """Test the network with N semi-folded conv2d + 1 semi-folded linear."""
         from tests.shared_networks import Conv2dSemiFolded_FC_ChainNetN
@@ -990,8 +990,8 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                 ) * semi_valid_interval[0]
             else:
                 ts_1st_valid[i] = (
-                        ts_1st_valid[i - 1]
-                        + (kshape_oihw[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
+                    ts_1st_valid[i - 1]
+                    + (kshape_oihw[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
                 )
 
         n_test = 3  # can be more
@@ -1037,7 +1037,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                             + ts_1st_valid[i_conv]
                             + i * semi_valid_interval[i_conv + 1]
                             - 1
-                            ],
+                        ],
                     )
 
             # x is the reference result of the last convolution.
@@ -1051,23 +1051,23 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                     + ts_1st_valid[-1]
                     + (ows[-1] - 1) * semi_valid_interval[-1]
                     - 1
-                    ],
+                ],
             )
 
     @pytest.mark.parametrize(
         "ishape_chw, n_pool, kshape_hw, stride, padding, out_features, pool_type",
-        [   # n_pool = 1
+        [  # n_pool = 1
             ((3, 16, 16), 1, [2], [2], [1], (10,), "avg"),
             # n_pool = 2
             ((3, 24, 24), 2, [2, 2], [1, 1], [0, 0], (2, 2), "avg"),
             (
-                    (3, 24, 24),
-                    2,
-                    [(2, 2), (2, 2)],
-                    [None, None],
-                    [1, 1],
-                    (10,),
-                    "avg",
+                (3, 24, 24),
+                2,
+                [(2, 2), (2, 2)],
+                [None, None],
+                [1, 1],
+                (10,),
+                "avg",
             ),
             ((4, 32, 32), 2, [3, 3], [1, 1], [(0, 0), (1, 1)], (10,), "avg"),
             ((1, 8, 8), 2, [3, 3], [1, 1], [(0, 0), (1, 1)], (10,), "avg"),
@@ -1075,22 +1075,37 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             ((3, 24, 24), 2, [(2, 2), (2, 2)], [2, 2], [], (10,), "max"),
             ((6, 32, 32), 2, [3, 3], [None, None], [], (10,), "max"),
             # n_pool = 3
-            ((3, 48, 48), 3, [3, 2, 2], [None, None, None], [(1, 1), (0, 0), (1, 1)], (10,), "avg"),
-            ((3, 48, 48), 3, [3, 3, 3], [2, 2, 2], [(2, 2), (0, 0), (1, 1)], (10,), "avg"),
+            (
+                (3, 48, 48),
+                3,
+                [3, 2, 2],
+                [None, None, None],
+                [(1, 1), (0, 0), (1, 1)],
+                (10,),
+                "avg",
+            ),
+            (
+                (3, 48, 48),
+                3,
+                [3, 3, 3],
+                [2, 2, 2],
+                [(2, 2), (0, 0), (1, 1)],
+                (10,),
+                "avg",
+            ),
             ((3, 48, 48), 3, [3, 2, 2], [None, None, None], [], (10,), "max"),
-
         ],
     )
     def test_Pool2dSemiFolded_FC_ChainNet(
-            self,
-            ishape_chw,
-            n_pool,
-            kshape_hw,
-            stride,
-            padding,
-            out_features,
-            pool_type,
-            fixed_rng: np.random.Generator,
+        self,
+        ishape_chw,
+        n_pool,
+        kshape_hw,
+        stride,
+        padding,
+        out_features,
+        pool_type,
+        fixed_rng: np.random.Generator,
     ):
         """Test the network with N semi-folded pool2d + 1 semi-folded linear."""
         from tests.shared_networks import Pool2dSemiFolded_FC_ChainNetN
@@ -1164,11 +1179,13 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         ts_1st_valid = [0] * n_pool
         for i in range(n_pool):
             if i == 0:
-                ts_1st_valid[i] = (ksizes[0][-1] - paddings[0][0]) * semi_valid_interval[0]
+                ts_1st_valid[i] = (
+                    ksizes[0][-1] - paddings[0][0]
+                ) * semi_valid_interval[0]
             else:
                 ts_1st_valid[i] = (
-                        ts_1st_valid[i - 1]
-                        + (ksizes[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
+                    ts_1st_valid[i - 1]
+                    + (ksizes[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
                 )
 
         n_test = 3  # can be more
@@ -1202,7 +1219,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                             + ts_1st_valid[i_pool]
                             + i * semi_valid_interval[i_pool + 1]
                             - 1
-                            ],
+                        ],
                     )
 
             # x is the reference result of the last pooling.
@@ -1216,7 +1233,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                     + ts_1st_valid[-1]
                     + (ows[-1] - 1) * semi_valid_interval[-1]
                     - 1
-                    ],
+                ],
             )
 
     @pytest.mark.parametrize(
@@ -1247,4 +1264,4 @@ def test_Linear(self, shape, weight):
             sim2.run(1)
 
         for i in range(N_TEST):
-            assert np.array_equal(sim1.data[net1.probe1][i], sim2.data[probe_linear][i])
\ No newline at end of file
+            assert np.array_equal(sim1.data[net1.probe1][i], sim2.data[probe_linear][i])
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index fab4942c..f75a7686 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -290,7 +290,7 @@ def __init__(self, shape, kernels, strides, paddings, out_features, weight):
 
 class Pool2dSemiFolded_FC_ChainNetN(pb.DynSysGroup):
     def __init__(
-            self, shape, kernel_sizes, strides, paddings, out_features, weight, pool_type
+        self, shape, kernel_sizes, strides, paddings, out_features, weight, pool_type
     ):
         super().__init__()
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)

From a96b1f0f3995ff2234b7eac985d28ebfe0219cca Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 4 Oct 2024 18:31:01 +0800
Subject: [PATCH 082/187] =?UTF-8?q?=F0=9F=8E=A8=20improved=20format=20for?=
 =?UTF-8?q?=20semi-folded=20ops=20&=20conv=20utils?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py            |  24 ++--
 paibox/components/functional.py          | 173 +++++++++++------------
 paibox/components/synapses/conv_utils.py |  26 ++--
 paibox/network.py                        |   7 +-
 tests/components/test_functional.py      |   5 +-
 tests/components/utils.py                |  28 +---
 6 files changed, 116 insertions(+), 147 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 19c04d45..cbf392b4 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -4,7 +4,6 @@
 from paicorelib import TM
 
 from paibox.base import NeuDyn, NodeList
-from paibox.exceptions import NotSupportedError
 from paibox.network import DynSysGroup
 from paibox.types import (
     LEAK_V_DTYPE,
@@ -156,10 +155,14 @@ class _DelayChainANN(_DelayChainBase):
 
 
 class _HasSemiFoldedIntf(Protocol):
-    """The front of this module has replication & delay interface for semi-folded convolution."""
+    """The front of this module has replication & delay interface for semi-folded operators."""
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, **build_options
+        self,
+        network: DynSysGroup,
+        valid_interval: int,
+        ts_first_valid_inp: int,
+        **build_options,
     ) -> BuiltComponentType: ...
 
 
@@ -167,19 +170,8 @@ def build(
 class _SemiFoldedModule(FunctionalModule, _HasSemiFoldedIntf):
     valid_interval: int = 1
     """The interval of valid output data"""
-    ts_1st_valid: int = 0
-
-    @staticmethod
-    def _w_padding_check(w_padding: int, prev_node: Union[NeuDyn, InputProj]) -> None:
-        # NOTE: Only support padding in the first semi-folded conv2d for now.
-        # In fact, it is rare for the H & W directions to be padded unequally.
-        # TODO Support H padding
-        # if w_padding > 0 and not isinstance(prev_node, InputProj):
-        #     raise NotSupportedError(
-        #         "only semi-folded convolutions that connect input projection "
-        #         "are supported to have padding in the W direction."
-        #     )
-        return
+    ts_1st_valid_out: int = 0
+    """The timestamp of the first valid output data"""
 
 
 class _LinearBase(FunctionalModule):
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index e0e84263..6726f28c 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -860,7 +860,7 @@ def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
 
 
 class LinearSemiFolded(_LinearBase, _SemiFoldedModule):
-    "That operator is used on the first fully-connected layer after the semi-folded convolution."
+    "This operator is used on the first fully-connected layer after the semi-folded convolution."
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
@@ -951,8 +951,6 @@ def __init__(
         self.kernel = kernel
         self.stride = _pair(stride)
         self.padding = _pair(padding)
-        self._w_padding_check(self.padding[1], neuron_s)
-
         self.bit_trunc = bit_trunc
 
         if isinstance(bias, np.ndarray):
@@ -971,7 +969,7 @@ def __init__(
         out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
 
         if in_ch != cin:
-            raise ShapeError(f"The channels mismatch: {in_ch} != {cin}.")
+            raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
         super().__init__(
             neuron_s,
@@ -988,7 +986,7 @@ def build(
         self,
         network: DynSysGroup,
         valid_interval: int,
-        input_valid: int,
+        ts_first_valid_inp: int,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
@@ -1000,19 +998,26 @@ def build(
         self.valid_interval = valid_interval
         _, in_h = self.module_intf.operands[0].shape_out
         _, cin, _, kw = self.kernel.shape
-        ts_1st_valid = input_valid + (kw - 1 - self.padding[0]) * valid_interval
-        self.ts_1st_valid = ts_1st_valid
-        tick_wait_end = (
-            1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+
+        self.ts_1st_valid_out = (
+            ts_first_valid_inp + (kw - 1 - self.padding[0]) * valid_interval
+        )
+        twe = (
+            1
+            + self.ts_1st_valid_out
+            + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
         )
+
         if cin * in_h * kw * valid_interval > 18432:
             raise ResourceError(
                 f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
             )
+
         n_delays = NodeList()
-        n_copies = NodeList()
+        n_neg_padding = NodeList()
         s_delays = NodeList()
         s_kernel = NodeList()
+        s_neg_padding = NodeList()
 
         n_conv2d = ANNNeuron(
             self.shape_out,
@@ -1020,7 +1025,7 @@ def build(
             self.bit_trunc,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
-            tick_wait_end=tick_wait_end,
+            tick_wait_end=twe,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
@@ -1030,7 +1035,7 @@ def build(
                 (cin, in_h),
                 delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=tick_wait_end,
+                tick_wait_end=twe,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_delay_{self.name}",
             )
@@ -1038,14 +1043,14 @@ def build(
             # delay synapses
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
-                n_delays[i],
+                neuron,
                 weights=_delay_mapping(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
             s_delays.append(syn1)
 
-            syn2 = Conv2dSemiFoldedSyn(  # cin, ih -> cout * oh
+            syn2 = Conv2dSemiFoldedSyn(
                 neuron,
                 n_conv2d,
                 self.kernel[:, :, :, kw - i - 1],
@@ -1056,39 +1061,50 @@ def build(
             )
             s_kernel.append(syn2)
 
-        if input_valid > 0:
-            for i in range(self.padding[0]):
+        # Extra negative padding layer
+        # NOTE: ts_first_valid_inp = 0 & padding[0] > 0 means the previous layer is
+        # an input node. No need to add negative padding layer for this case.
+        # TODO add technical details
+        if ts_first_valid_inp > 0:
+            for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, in_h),
-                    delay=valid_interval * (kw - 1 - i) + 1,
+                    delay=valid_interval * (kw - 1 - p) + 1,
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=input_valid,
+                    tick_wait_end=ts_first_valid_inp,
                     keep_shape=self.keep_shape,
-                    name=f"n{i}_copy_{self.name}",
+                    name=f"n{p}_pad_{self.name}",
                 )
-
-                n_copies.append(neuron)
+                n_neg_padding.append(neuron)
                 # delay synapses
                 syn1 = FullConnSyn(
                     self.module_intf.operands[0],
-                    n_copies[i],
+                    neuron,
                     weights=_delay_mapping(in_h, cin),
                     conn_type=ConnType.All2All,
-                    name=f"s{i}_copy_{self.name}",
+                    name=f"s{p}_pad_{self.name}",
                 )
                 s_delays.append(syn1)
 
-                syn2 = Conv2dSemiFoldedSyn(  # cin, ih -> cout * oh
-                    n_copies[i],
+                syn2 = Conv2dSemiFoldedSyn(
+                    neuron,
                     n_conv2d,
-                    -(self.kernel[:, :, :, i]),
+                    -(self.kernel[:, :, :, p]),
                     self.stride,
                     self.padding,
                     "OIL",
-                    name=f"neg_s{i}_{self.name}",
+                    name=f"neg_s{p}_{self.name}",
                 )
-                s_kernel.append(syn2)
-        generated = [n_conv2d, *n_delays, *n_copies, *s_delays, *s_kernel]
+                s_neg_padding.append(syn2)
+
+        generated = [
+            n_conv2d,
+            *n_delays,
+            *n_neg_padding,
+            *s_delays,
+            *s_kernel,
+            *s_neg_padding,
+        ]
         self._rebuild_out_intf(network, n_conv2d, *generated, **build_options)
 
         return generated
@@ -1210,7 +1226,6 @@ def __init__(
         neuron_s: Union[NeuDyn, InputProj],
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
-        padding: _Size2Type = 0,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
@@ -1223,13 +1238,10 @@ def __init__(
             _stride = _pair(stride)
 
         self.stride = _stride
-        self.padding = _pair(padding)
-        # self._w_padding_check(self.padding[1], neuron_s)
 
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
-
-        out_h = (in_h - self.kernel_size[0] + 2 * self.padding[0]) // self.stride[0] + 1
+        out_h = (in_h - self.kernel_size[0]) // self.stride[0] + 1
 
         super().__init__(
             neuron_s,
@@ -1246,7 +1258,7 @@ def build(
         self,
         network: DynSysGroup,
         valid_interval: int,
-        input_valid: int,
+        ts_first_valid_inp: int,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
@@ -1261,10 +1273,11 @@ def build(
         cin = in_ch
         _, kw = self.kernel_size
 
-        ts_1st_valid = input_valid + (kw - 1) * valid_interval
-        self.ts_1st_valid = ts_1st_valid
-        tick_wait_end = (
-            1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        self.ts_1st_valid_out = ts_first_valid_inp + (kw - 1) * valid_interval
+        twe = (
+            1
+            + self.ts_1st_valid_out
+            + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
         )
 
         if cin * in_h * kw * valid_interval > 18432:
@@ -1279,7 +1292,7 @@ def build(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
-            tick_wait_end=tick_wait_end,
+            tick_wait_end=twe,
             pool_max=True,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
@@ -1290,7 +1303,7 @@ def build(
                 (cin, in_h),
                 delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=tick_wait_end,
+                tick_wait_end=twe,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1298,7 +1311,7 @@ def build(
             # delay synapses
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
-                n_delays[i],
+                neuron,
                 weights=_delay_mapping(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
@@ -1313,7 +1326,7 @@ def build(
                     self.shape_out[1],
                     self.kernel_size[0],
                     self.stride,
-                    self.padding,
+                    (0, 0),
                 ),
                 name=f"s{i}_{self.name}",
             )
@@ -1347,11 +1360,9 @@ def __init__(
 
         self.stride = _stride
         self.padding = _pair(padding)
-        # self._w_padding_check(self.padding[1], neuron_s)
 
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
-
         out_h = (in_h - self.kernel_size[0] + 2 * self.padding[0]) // self.stride[0] + 1
 
         super().__init__(
@@ -1369,7 +1380,7 @@ def build(
         self,
         network: DynSysGroup,
         valid_interval: int,
-        input_valid: int,
+        ts_first_valid_inp: int,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
@@ -1379,20 +1390,15 @@ def build(
         #     )
         #     self.module_intf.operands[0].shape_change((in_ch, in_h))
         self.valid_interval = valid_interval
-
         in_ch, in_h = self.module_intf.operands[0].shape_out
         cin = in_ch
         kh, kw = self.kernel_size
-        if cin * in_h * kw * valid_interval > 18432:
-            raise ResourceError(
-                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
-            )
+        out_h = self.shape_out[1]
 
-        ts_1st_valid = input_valid + (kw - 1 - self.padding[0]) * valid_interval
-        self.ts_1st_valid = ts_1st_valid
-        tick_wait_end = (
-            1 + ts_1st_valid + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        self.ts_1st_valid_out = (
+            ts_first_valid_inp + (kw - 1 - self.padding[0]) * valid_interval
         )
+        twe = 1 + self.ts_1st_valid_out + (out_h - 1) * valid_interval * self.stride[1]
 
         E = math.ceil(math.log2(cin * in_h * kw / 144))
         E = 0 if E < 0 else E
@@ -1406,16 +1412,16 @@ def build(
         bit_trunc = 8 + (kh * kw).bit_length() - 1
 
         n_delays = NodeList()
-        n_copies = NodeList()
+        n_neg_padding = NodeList()
         s_delays = NodeList()
-        s_kernel = NodeList()
+        s_neg_padding = NodeList()
 
         pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             bit_trunc=bit_trunc,
             tick_wait_start=self.tick_wait_start + 1,
-            tick_wait_end=tick_wait_end,
+            tick_wait_end=twe,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
@@ -1424,7 +1430,7 @@ def build(
                 (cin, in_h),
                 delay=valid_interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=tick_wait_end,
+                tick_wait_end=twe,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1432,7 +1438,7 @@ def build(
             # delay synapses
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
-                n_delays[i],
+                neuron,
                 weights=_delay_mapping(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
@@ -1442,57 +1448,47 @@ def build(
                 neuron,
                 pool2d,
                 weights=_poo2d_semifolded_mapping(
-                    cin,
-                    in_h,
-                    self.shape_out[1],
-                    self.kernel_size[0],
-                    self.stride,
-                    self.padding,
+                    cin, in_h, out_h, kh, self.stride, self.padding
                 ),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
             )
             s_delays.append(syn2)
-        if input_valid > 0:
-            for i in range(self.padding[0]):
+
+        # Extra negative padding layer
+        if ts_first_valid_inp > 0:
+            for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, in_h),
-                    delay=valid_interval * (kw - 1 - i) + 1,
+                    delay=valid_interval * (kw - 1 - p) + 1,
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=input_valid,
+                    tick_wait_end=ts_first_valid_inp,
                     keep_shape=self.keep_shape,
-                    name=f"n{i}_copy_{self.name}",
+                    name=f"n{p}_pad_{self.name}",
                 )
-
-                n_copies.append(neuron)
+                n_neg_padding.append(neuron)
                 # delay synapses
                 syn1 = FullConnSyn(
                     self.module_intf.operands[0],
-                    n_copies[i],
+                    neuron,
                     weights=_delay_mapping(in_h, cin),
                     conn_type=ConnType.All2All,
-                    name=f"s{i}_copy_{self.name}",
+                    name=f"s{p}_pad_{self.name}",
                 )
                 s_delays.append(syn1)
 
-                syn2 = FullConnSyn(  # cin, ih -> cout * oh
-                    n_copies[i],
+                syn2 = FullConnSyn(
+                    neuron,
                     pool2d,
-                    weights=-(
-                        _poo2d_semifolded_mapping(
-                            cin,
-                            in_h,
-                            self.shape_out[1],
-                            self.kernel_size[0],
-                            self.stride,
-                            self.padding,
-                        )
+                    weights=-_poo2d_semifolded_mapping(
+                        cin, in_h, out_h, kh, self.stride, self.padding
                     ),
                     conn_type=ConnType.All2All,
                     name=f"neg_s{i}_{self.name}",
                 )
-                s_kernel.append(syn2)
-        generated = [pool2d, *n_delays, *s_delays]
+                s_neg_padding.append(syn2)
+
+        generated = [pool2d, *n_delays, *n_neg_padding, *s_delays, *s_neg_padding]
         self._rebuild_out_intf(network, pool2d, *generated, **build_options)
 
         return generated
@@ -1623,6 +1619,7 @@ def _poo2d_semifolded_mapping(
 
     for j in range(oh):
         m_block[j * stride[1] : j * stride[1] + kh, j] = 1
+
     if padding[0] > 0:
         m_block = np.delete(
             m_block,
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 847aa878..fd3752d9 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -194,8 +194,10 @@ def _conv2d_semifolded_unroll(
     ih = in_shape[1] + 2 * padding[0]
     _, oh = out_shape
     w_np = np.zeros((cin * in_shape[1], cout * oh), dtype=kernel.dtype)
+
     for i in range(cout):
         for j in range(cin):
+            # Must recreate `w_block` every time because some rows will be deleted.
             w_block = np.zeros((ih, oh), dtype=kernel.dtype)
             for k in range(oh):
                 w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[i, j, :]
@@ -209,6 +211,7 @@ def _conv2d_semifolded_unroll(
             w_np[j * in_shape[1] : (j + 1) * in_shape[1], i * oh : (i + 1) * oh] = (
                 w_block
             )
+
     return w_np
 
 
@@ -227,7 +230,7 @@ def _conv1d_faster(
     """Faster 1d convolution."""
     cout, _, kl = kernel.shape  # (O, I, L)
 
-    x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])), mode="constant")
+    x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])))
 
     # kernel: (cout, cin, kl) -> (cout, cin*kl)
     col_kernel = kernel.reshape(cout, -1)
@@ -257,7 +260,6 @@ def _conv2d_faster(
     x_padded = np.pad(
         x_chw,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-        mode="constant",
     )
 
     # kernel: (cout, cin, kh, kw) -> (cout, cin*kh*kw)
@@ -338,9 +340,7 @@ def _convtranspose1d_unroll(
     )
 
     # output_padding
-    w_unrolled = np.pad(
-        w_unrolled, ((0, 0), (0, 0), (0, 0), (0, output_padding[0])), mode="constant"
-    )
+    w_unrolled = np.pad(w_unrolled, ((0, 0), (0, 0), (0, 0), (0, output_padding[0])))
     w_unrolled = w_unrolled.reshape(cin * in_shape[0], cout * out_shape[0])
 
     return w_unrolled
@@ -432,7 +432,6 @@ def _convtranspose2d_unroll(
             (0, output_padding[0]),
             (0, output_padding[1]),
         ),
-        mode="constant",
     )
     w_unrolled = w_unrolled.reshape(
         cin * in_shape[0] * in_shape[1], cout * out_shape[0] * out_shape[1]
@@ -468,7 +467,7 @@ def _convtranspose1d_faster(
 
     # inverse padding
     # x_transpose : (cin, (xl-1)*(stride-1)+2*(kl-1))
-    x_transpose = np.pad(x_transpose, ((0, 0), (kl - 1, kl - 1)), mode="constant")
+    x_transpose = np.pad(x_transpose, ((0, 0), (kl - 1, kl - 1)))
 
     # convolution kernel rotated 180 degrees
     kernel_flip = np.flip(kernel, axis=2)
@@ -489,7 +488,7 @@ def _convtranspose1d_faster(
     out = out[:, padding[0] : (-1 * padding[0])] if padding[0] > 0 else out
 
     # output_padding
-    out = np.pad(out, ((0, 0), (0, output_padding[0])), mode="constant")
+    out = np.pad(out, ((0, 0), (0, output_padding[0])))
 
     return out.astype(VOLTAGE_DTYPE)
 
@@ -524,9 +523,7 @@ def _convtranspose2d_faster(
     x_transpose = np.zeros((xc_t, xh_t, xw_t), dtype=x_chw.dtype)
     x_transpose[::1, :: stride[0], :: stride[1]] = x_chw
     # padding 0 for transpose not for parameter padding, get new input array x_transpose
-    x_transpose = np.pad(
-        x_transpose, ((0, 0), (kh - 1, kh - 1), (kw - 1, kw - 1)), mode="constant"
-    )
+    x_transpose = np.pad(x_transpose, ((0, 0), (kh - 1, kh - 1), (kw - 1, kw - 1)))
 
     # kernel: (cout, cin, kh, kw) -> (cout, cin*kh*kw)
     kernel_flip = np.flip(kernel, axis=(2, 3))  # convolution kernel rotated 180 degrees
@@ -551,9 +548,7 @@ def _convtranspose2d_faster(
         padding[1] : (-1 * padding[1]) if padding[1] > 0 else None,
     ]
     # output_padding
-    out = np.pad(
-        out, ((0, 0), (0, output_padding[0]), (0, output_padding[1])), mode="constant"
-    )
+    out = np.pad(out, ((0, 0), (0, output_padding[0]), (0, output_padding[1])))
 
     return out
 
@@ -702,7 +697,7 @@ def _func_pool1d(
     assert (xl + padding[0] * 2 - kl) // stride[0] + 1 == ol
 
     out = np.zeros((cout, ol), dtype=np.int32)
-    x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])), mode="constant")
+    x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])))
 
     for c in range(cout):
         for i in range(ol):
@@ -740,7 +735,6 @@ def _func_pool2d(
     x_padded = np.pad(
         x_chw,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-        mode="constant",
     )
 
     for c in range(cout):
diff --git a/paibox/network.py b/paibox/network.py
index 3d2a9eae..b657105b 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -93,16 +93,17 @@ def build_fmodule(
         # If the input data is input continuously on the W-axis, the initial
         # valid interval for the first semi-folded component is 1.
         semi_valid_interval = 1
-        ts_1st_valid = 0
+        ts_1st_valid_out = 0
+
         for module in modules.values():
             if isinstance(
                 module, (Conv2dSemiFolded, MaxPool2dSemiFolded, AvgPool2dSemiFolded)
             ):
                 generated[module] = module.build(
-                    network, semi_valid_interval, ts_1st_valid, **build_options
+                    network, semi_valid_interval, ts_1st_valid_out, **build_options
                 )
                 semi_valid_interval *= module.stride[1]
-                ts_1st_valid = module.ts_1st_valid
+                ts_1st_valid_out = module.ts_1st_valid_out
             elif isinstance(module, LinearSemiFolded):
                 generated[module] = module.build(
                     network, semi_valid_interval, **build_options
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index e7c4b1fa..15df0663 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -986,7 +986,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         for i in range(n_conv):
             if i == 0:
                 ts_1st_valid[i] = (
-                    kshape_oihw[0][-1] - padding[0]
+                    kshape_oihw[0][-1] - paddings[0][0]
                 ) * semi_valid_interval[0]
             else:
                 ts_1st_valid[i] = (
@@ -1056,7 +1056,8 @@ def test_Conv2dSemiFolded_FC_ChainNet(
 
     @pytest.mark.parametrize(
         "ishape_chw, n_pool, kshape_hw, stride, padding, out_features, pool_type",
-        [  # n_pool = 1
+        [
+            # n_pool = 1
             ((3, 16, 16), 1, [2], [2], [1], (10,), "avg"),
             # n_pool = 2
             ((3, 24, 24), 2, [2, 2], [1, 1], [0, 0], (2, 2), "avg"),
diff --git a/tests/components/utils.py b/tests/components/utils.py
index cc7ac71a..e0ed80d4 100644
--- a/tests/components/utils.py
+++ b/tests/components/utils.py
@@ -31,7 +31,7 @@ def conv1d_golden(
 
     out = np.zeros((cout,) + out_shape, dtype=np.int64)
 
-    x_padded = np.pad(x, ((0, 0), (padding[0], padding[0])), mode="constant")
+    x_padded = np.pad(x, ((0, 0), (padding[0], padding[0])))
     conv_result = np.zeros((ol,), dtype=np.int64)
 
     for o in range(cout):
@@ -70,7 +70,6 @@ def conv2d_golden(
     x_padded = np.pad(
         x,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-        mode="constant",
     )
     conv_result = np.zeros((oh, ow), dtype=np.int64)
 
@@ -110,11 +109,7 @@ def maxpool1d_golden(
     cout = xcin
 
     out = np.zeros((cout, ol), dtype=x.dtype)
-    x_padded = np.pad(
-        _x,
-        ((0, 0), (padding[0], padding[0])),
-        mode="constant",
-    )
+    x_padded = np.pad(_x, ((0, 0), (padding[0], padding[0])))
 
     for c in range(cout):
         for i in range(ol):
@@ -168,11 +163,7 @@ def maxpool2d_golden(
     else:
         out = np.zeros((cout, oh, ow), dtype=SPIKE_DTYPE)
 
-    x_padded = np.pad(
-        _x,
-        ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-        mode="constant",
-    )
+    x_padded = np.pad(_x, ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])))
 
     for c in range(cout):
         for i in range(oh):
@@ -208,11 +199,7 @@ def avgpool1d_golden(
     cout = xcin
 
     out = np.zeros((cout, ol), dtype=WEIGHT_DTYPE)
-    x_padded = np.pad(
-        _x,
-        ((0, 0), (padding[0], padding[0])),
-        mode="constant",
-    )
+    x_padded = np.pad(_x, ((0, 0), (padding[0], padding[0])))
 
     for c in range(cout):
         for i in range(ol):
@@ -265,11 +252,7 @@ def avgpool2d_golden(
 
     # Treat the result as voltage since it will be turncated or compared later.
     out = np.zeros((cout, oh, ow), dtype=VOLTAGE_DTYPE)
-    x_padded = np.pad(
-        _x,
-        ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-        mode="constant",
-    )
+    x_padded = np.pad(_x, ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])))
 
     for c in range(cout):
         for i in range(oh):
@@ -285,4 +268,5 @@ def avgpool2d_golden(
     if threshold:
         return out >= threshold
     else:
+        # Use the bit truncation method to simulate the behavior of the hardware.
         return out >> ((kh * kw).bit_length() - 1)

From 3e4615117feae0be72154cc0a07f18dea4273b2a Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 4 Oct 2024 19:17:50 +0800
Subject: [PATCH 083/187] =?UTF-8?q?=E2=9C=A8=20add=20check=20for=20semi-fo?=
 =?UTF-8?q?lded=20ops=20during=20the=20build=20phase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py        |  3 +++
 paibox/components/_modules.py   | 22 +++++++++++++++++++++-
 paibox/components/functional.py | 21 ++++++---------------
 3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 640052cf..5912838b 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -113,6 +113,9 @@ def build(self, *networks: DynSysGroup, **build_options) -> None:
 
     def _pre_build(self, **build_options) -> None:
         """Preprocessing before obtaining the topology."""
+        # Check the hardware resource limits of operators in the network during the build phase.
+        build_options.setdefault("check_before_compile", True)
+
         # Build functional modules in the subnets
         for subnet in self._raw_networks:
             DynSysGroup.build_fmodule(subnet, **build_options)
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index cbf392b4..daacfaba 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,9 +1,11 @@
+import math
 from typing import Literal, Optional, Protocol, Union
 
 import numpy as np
-from paicorelib import TM
+from paicorelib import TM, HwConfig
 
 from paibox.base import NeuDyn, NodeList
+from paibox.exceptions import ResourceError
 from paibox.network import DynSysGroup
 from paibox.types import (
     LEAK_V_DTYPE,
@@ -173,6 +175,24 @@ class _SemiFoldedModule(FunctionalModule, _HasSemiFoldedIntf):
     ts_1st_valid_out: int = 0
     """The timestamp of the first valid output data"""
 
+    def _input_buffer_len_check(
+        self, in_channels: int, in_h: int, kw: int, valid_interval: int
+    ) -> None:
+        """Check the limit of the semi-folded operators on the input buffer length of the core during the build phase.
+
+        NOTE: If the condition is not met, an expection will be raised in the subsequent compilation phase.
+        """
+        E = math.ceil(
+            math.log2(
+                math.ceil(in_channels * in_h * kw / HwConfig.N_FANIN_PER_DENDRITE_ANN)
+            )
+        )
+
+        if not kw * valid_interval > HwConfig.N_TIMESLOT_MAX / (2**E):
+            raise ResourceError(
+                f"the input size of {self.name} is too large. Please adjust the input size or the number of channels."
+            )
+
 
 class _LinearBase(FunctionalModule):
     def __init__(
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 6726f28c..00bdcf66 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1,4 +1,3 @@
-import math
 import sys
 from collections.abc import Sequence
 from functools import partial
@@ -1008,10 +1007,8 @@ def build(
             + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
         )
 
-        if cin * in_h * kw * valid_interval > 18432:
-            raise ResourceError(
-                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
-            )
+        if build_options.get("check_before_compile"):
+            self._input_buffer_len_check(cin, in_h, kw, valid_interval)
 
         n_delays = NodeList()
         n_neg_padding = NodeList()
@@ -1280,10 +1277,8 @@ def build(
             + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
         )
 
-        if cin * in_h * kw * valid_interval > 18432:
-            raise ResourceError(
-                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
-            )
+        if build_options.get("check_before_compile"):
+            self._input_buffer_len_check(cin, in_h, kw, valid_interval)
 
         n_delays = NodeList()
         s_delays = NodeList()
@@ -1400,12 +1395,8 @@ def build(
         )
         twe = 1 + self.ts_1st_valid_out + (out_h - 1) * valid_interval * self.stride[1]
 
-        E = math.ceil(math.log2(cin * in_h * kw / 144))
-        E = 0 if E < 0 else E
-        if kw * valid_interval > 256 / (2**E):
-            raise ResourceError(
-                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
-            )
+        if build_options.get("check_before_compile"):
+            self._input_buffer_len_check(cin, in_h, kw, valid_interval)
 
         # NOTE: Division is achieved with the help of truncation operation.
         # It can only be approximated to a power of an integer of 2.

From 3a349af9446f80e23533b1b772c00f6fdde897ba Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 7 Oct 2024 17:14:51 +0000
Subject: [PATCH 084/187] :arrow_up: auto update by pre-commit hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/pre-commit/pre-commit-hooks: v4.6.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v5.0.0)
---
 .pre-commit-config.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0e18fa2d..6794b316 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,20 +10,20 @@ repos:
     rev: 5.13.2
     hooks:
       - id: isort
-        stages: [commit]
+        stages: [Nonepre-commitNone]
 
   - repo: https://github.com/psf/black
     rev: 24.8.0
     hooks:
       - id: black
-        stages: [commit]
+        stages: [Nonepre-commitNone]
 
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
         types_or: [markdown, yaml, json]
-        stages: [commit]
+        stages: [Nonepre-commitNone]
 
   - repo: https://github.com/dannysepler/rm_unneeded_f_str
     rev: v0.2.0
@@ -31,7 +31,7 @@ repos:
       - id: rm-unneeded-f-str
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer

From cab321f3a53d105f35132feb85c3fcef4d416671 Mon Sep 17 00:00:00 2001
From: hongtux <hongtux@pku.edu.cn>
Date: Wed, 9 Oct 2024 19:16:07 +0800
Subject: [PATCH 085/187] fm-bug-fixed

---
 paibox/components/synapses/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 4a590c41..e7ef3aca 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -79,7 +79,7 @@ def update(self, x: Optional[NeuOutType] = None, *args, **kwargs) -> SynOutType:
                 )
         else:
             # Retrieve 0 to the dest neurons if it is not working
-            synin = np.zeros_like(self.source.output)
+            synin = np.zeros_like(self.source.delay_registers[0] if x is None else np.atleast_1d(x))
 
         self._synout = self.comm(synin).ravel()
 

From 285abad9ed350911490e9b2536e3059015f708f3 Mon Sep 17 00:00:00 2001
From: hongtux <hongtux@pku.edu.cn>
Date: Thu, 10 Oct 2024 10:27:21 +0800
Subject: [PATCH 086/187] fix fmodule bugs

---
 paibox/components/synapses/base.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index e7ef3aca..4605810a 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -79,7 +79,10 @@ def update(self, x: Optional[NeuOutType] = None, *args, **kwargs) -> SynOutType:
                 )
         else:
             # Retrieve 0 to the dest neurons if it is not working
-            synin = np.zeros_like(self.source.delay_registers[0] if x is None else np.atleast_1d(x))
+            if isinstance(self.source, InputProj):
+                synin = np.zeros.like(self.source.output if x is None else np.atleast_1d(x))
+            else:
+                synin = np.zeros_like(self.source.delay_registers[0] if x is None else np.atleast_1d(x))
 
         self._synout = self.comm(synin).ravel()
 

From a81b5a7fc0f931a27090592f173d1779ae2e2329 Mon Sep 17 00:00:00 2001
From: birdswimming <birdswimming3.14@gmail.com>
Date: Thu, 10 Oct 2024 11:21:05 +0800
Subject: [PATCH 087/187] refine constraint in routing

---
 paibox/backend/routing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index dc0a1507..ac06ff23 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -611,7 +611,7 @@ def place_routing_group(self, routing_group: RoutingGroup) -> None:
         n_core_req = routing_group.n_core_required
         n_core_cost = 1 << (n_core_req - 1).bit_length()  # n_core_req <= 2^X
 
-        if n_core_cost > HwConfig.N_CORE_OFFLINE:
+        if n_core_req > HwConfig.N_CORE_OFFLINE:
             raise ResourceError(
                 "the number of cores required by the routing group exceeds the hardware limit, "
                 f"{n_core_cost} > {HwConfig.N_CORE_OFFLINE}."

From 20ca9a3f3d8819b4552cd928a7895e81cb36be6a Mon Sep 17 00:00:00 2001
From: Joustrd <17739386485@163.com>
Date: Thu, 10 Oct 2024 15:51:20 +0800
Subject: [PATCH 088/187] fix fmodule bugs

---
 paibox/components/synapses/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 4605810a..55c17221 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -80,7 +80,7 @@ def update(self, x: Optional[NeuOutType] = None, *args, **kwargs) -> SynOutType:
         else:
             # Retrieve 0 to the dest neurons if it is not working
             if isinstance(self.source, InputProj):
-                synin = np.zeros.like(self.source.output if x is None else np.atleast_1d(x))
+                synin = np.zeros_like(self.source.output if x is None else np.atleast_1d(x))
             else:
                 synin = np.zeros_like(self.source.delay_registers[0] if x is None else np.atleast_1d(x))
 

From f3a7bda6d4c5bc8a8562cc67462d3d38df40bfb1 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sun, 22 Sep 2024 22:07:45 +0800
Subject: [PATCH 089/187] =?UTF-8?q?=E2=9C=A8=20add=20prototype=20functions?=
 =?UTF-8?q?=20for=20wram=20mapping=20&=20test=20cases?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_placement.py | 781 ++++++++++++++++++++++++--------
 1 file changed, 583 insertions(+), 198 deletions(-)

diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index 3d67dd4f..679c2089 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -1,20 +1,44 @@
+import math
 import numpy as np
 import pytest
-from paicorelib import LCN_EX, HwConfig
+import sys
+from contextlib import nullcontext
+from functools import partial
+from paicorelib import Coord, LCN_EX, HwConfig, NeuronAttrs
+from paicorelib import ReplicationId as RId
 from paicorelib import WeightWidth as WW
+from paicorelib.framelib import OfflineFrameGen
+from typing import Literal, Optional
 
 import paibox as pb
 from paibox.backend.placement import CorePlacement
-from paibox.backend.types import WRAM_PACKED_DTYPE, NeuSegment, WRAMUnpackedType
+from paibox.backend.types import (
+    WRAM_PACKED_DTYPE,
+    WRAM_UNPACKED_DTYPE,
+    NeuSegment,
+    WRAMPackedType,
+    WRAMUnpackedType,
+)
 from paibox.exceptions import ResourceError
 from paibox.types import WEIGHT_DTYPE, WeightType
 
+from .test_conf_template import _gen_random_neuron_dest_info
+
 
-def packbits_ref(bits: np.ndarray, count: int) -> int:
-    """Pack unsigned bits into a signed integer.
+def _packbits_ref(bits: np.ndarray, count: Optional[int] = None) -> int:
+    """Pack unsigned bits (from LSB to MSB) into a signed integer.
 
-    This is a test of the prototype of the original function.
+    Args:
+        - bits: an array of bits from LSB to MSB(sign bit).
+        - count: `bits` is an N-bit signed integer. If not provided, it is  \
+            assumed to be the same as `bits.size`.
     """
+    if count is None:
+        count = bits.size
+
+    if count == 1:
+        return bits[0]
+
     _bits = np.append(bits[: count - 1], bits[-1])
 
     result = sum(bit << i for i, bit in enumerate(_bits))
@@ -23,7 +47,19 @@ def packbits_ref(bits: np.ndarray, count: int) -> int:
     return result
 
 
-def test_get_raw_weight_ref(fixed_rng: np.random.Generator):
+packbits1 = partial(_packbits_ref, count=1)
+packbits2 = partial(_packbits_ref, count=2)
+packbits4 = partial(_packbits_ref, count=4)
+packbits8 = partial(_packbits_ref, count=8)
+
+
+def _nbit_limit(nbit: int) -> tuple[int, int]:
+    hi = 2 if nbit == 1 else 1 << (nbit - 1)
+    lo = 0 if nbit == 1 else -hi
+    return lo, hi
+
+
+def test_get_raw_weight(fixed_rng: np.random.Generator):
     w1 = fixed_rng.integers(-128, 128, size=(10, 20), dtype=WEIGHT_DTYPE)
     w2 = fixed_rng.integers(-128, 128, size=(10, 30), dtype=WEIGHT_DTYPE)
 
@@ -55,81 +91,15 @@ def test_get_raw_weight_ref(fixed_rng: np.random.Generator):
         w_of_neu_segs_of_cb.append(w_of_neu_segs)
 
 
-@pytest.mark.parametrize(
-    "input, n_col_groups, expected",
-    [
-        (
-            np.arange(1, 17, dtype=np.int8).reshape(8, 2),
-            2,
-            np.array(
-                [
-                    [1, 13, 2, 14],
-                    [3, 15, 4, 16],
-                    [5, 0, 6, 0],
-                    [7, 0, 8, 0],
-                    [9, 0, 10, 0],
-                    [11, 0, 12, 0],
-                ],
-                dtype=np.int8,
-            ),
-        ),
-        (
-            np.arange(1, 13, dtype=np.int8).reshape(6, 2),
-            3,
-            np.array([[1, 5, 9, 2, 6, 10], [3, 7, 11, 4, 8, 12]], dtype=np.int8),
-        ),
-        (
-            np.arange(1, 25, dtype=np.int8).reshape(8, 3),
-            3,
-            np.array(
-                [
-                    [1, 10, 19, 2, 11, 20, 3, 12, 21],
-                    [4, 13, 22, 5, 14, 23, 6, 15, 24],
-                    [7, 16, 0, 8, 17, 0, 9, 18, 0],
-                ],
-                dtype=np.int8,
-            ),
-        ),
-    ],
-)
-def test_weight_ram_mapping(input, n_col_groups, expected):
-    """Convert a weight matirx into a standard binary connectivity.
-
-    This is a test of the prototype of the original function.
-    """
-    cur_shape = input.shape
-    row, _ = expected.shape
-    o_matrix = np.zeros(expected.shape, dtype=np.int8)
-
-    for i in range(cur_shape[1]):
-        w_col = input[:, i]
-        col_group = 0
-
-        while (n_rest_axon := cur_shape[0] - row * col_group) > row:
-            o_matrix[:, n_col_groups * i + col_group] = w_col[
-                row * col_group : row * (col_group + 1)
-            ]
-            col_group += 1
-
-        o_matrix[:, n_col_groups * i + col_group] = np.pad(
-            w_col[row * col_group :],
-            pad_width=(0, row - n_rest_axon),
-            mode="constant",
-            constant_values=0,
-        )
-
-    assert np.array_equal(o_matrix, expected)
-
-
-def test_nfold_weight_ref():
+def test_nfold_weight():
+    """A prototype function of `_nfold_weight` to test the weight folding."""
     original_matrix = np.arange(1, 25, dtype=WEIGHT_DTYPE).reshape(8, 3)
     nfold = 3
 
-    if original_matrix.shape[0] % nfold > 0:
-        _padding = nfold - original_matrix.shape[0] % nfold
+    if (r := original_matrix.shape[0] % nfold) > 0:
         w_padding = np.append(
             original_matrix,
-            values=np.zeros((_padding, original_matrix.shape[1]), dtype=WEIGHT_DTYPE),
+            values=np.zeros((nfold - r, original_matrix.shape[1]), dtype=WEIGHT_DTYPE),
             axis=0,
         )
     else:
@@ -137,8 +107,9 @@ def test_nfold_weight_ref():
 
     split = np.vsplit(w_padding, nfold)
 
+    expected_row = w_padding.shape[0] // nfold
     result = np.zeros(
-        (w_padding.shape[0] // nfold, original_matrix.shape[1] * nfold),
+        (expected_row, original_matrix.shape[1] * nfold),
         dtype=WEIGHT_DTYPE,
     )
 
@@ -159,32 +130,45 @@ def test_nfold_weight_ref():
     )
 
 
-class TestWeightUnpack:
-    @pytest.mark.parametrize(
-        "wp",
-        [
-            WW.WEIGHT_WIDTH_8BIT,
-            WW.WEIGHT_WIDTH_4BIT,
-            WW.WEIGHT_WIDTH_2BIT,
-            WW.WEIGHT_WIDTH_1BIT,
-        ],
-    )
-    def test_signed_unpackbits(self, wp):
-        count = 1 << wp
-        actual_array = np.arange(-(1 << (count - 1)), (1 << (count - 1)), dtype=np.int8)
+N_BIT_PACKED_WEIGHT = WRAM_PACKED_DTYPE(1).nbytes * 8  # #N bits of packed weight
+if hasattr(CorePlacement, "WRAM_BASE_SHAPE"):
+    WRAM_BASE_SHAPE = CorePlacement.WRAM_BASE_SHAPE
+else:
+    WRAM_BASE_SHAPE = (HwConfig.ADDR_AXON_MAX + 1, HwConfig.ADDR_RAM_MAX + 1)
 
-        for actual_signed in actual_array:
-            unpacked = np.unpackbits(
-                np.uint8(actual_signed), axis=0, count=count, bitorder="little"
-            )
-            assert actual_signed == packbits_ref(unpacked, count)
 
-    def test_uint8_unpackbits_scalar(self):
-        import sys
+def _get_max_fanout(iw: int, dendr_comb_rate: int) -> int:
+    if iw == 1:
+        return HwConfig.N_DENDRITE_MAX_SNN >> dendr_comb_rate
+    else:
+        return FANOUT_IW8[dendr_comb_rate]
+
+
+class TestWeightUnpackAndPack:
+    def test_signed_unpackbits(self):
+        for wp in WW:
+            nbit = 1 << wp
+            _low, _high = _nbit_limit(nbit)
+
+            if nbit == 1:
+                assert (_low, _high) == (0, 2)
+            elif nbit == 2:
+                assert (_low, _high) == (-2, 2)
+            elif nbit == 4:
+                assert (_low, _high) == (-8, 8)
+            else:
+                assert (_low, _high) == (-128, 128)
 
-        # Little endian on x86_64
-        assert sys.byteorder == "little"
+            actual_array = np.arange(_low, _high, dtype=np.int8)
 
+            for actual_signed in actual_array:
+                unpacked = np.unpackbits(
+                    np.uint8(actual_signed), axis=0, count=nbit, bitorder="little"
+                )
+                assert actual_signed == _packbits_ref(unpacked, nbit)
+
+    @pytest.mark.skipif(sys.byteorder != "little", reason="not little-endian")
+    def test_uint8_unpackbits_scalar(self):
         x1 = np.int8(101)  # 01100101
         assert x1 == 0b01100101
         x2 = np.int8(-27)  # 11100101
@@ -198,27 +182,89 @@ def test_uint8_unpackbits_scalar(self):
         assert np.array_equal(y2, np.array([1, 0, 1, 0, 0, 1, 1, 1], dtype=np.uint8))
 
     @pytest.mark.parametrize(
-        "shape, wp, nfold, is_iw8",
+        "shape, wp, lcn_ex",
+        [
+            ((120, 800), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X),
+            ((16, 16), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_2X),
+            ((80, 48), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_16X),
+            ((100, 510), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_1X),
+            ((99, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_2X),
+            ((100, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_8X),
+        ],
+    )
+    def test_unpacked_weight_pack(
+        self, shape, wp, lcn_ex, fixed_rng: np.random.Generator
+    ):
+        assert shape[1] <= _get_max_fanout(8, wp + lcn_ex)
+
+        nbit = 1 << wp
+        nfold = 1 << lcn_ex
+        _low, _high = _nbit_limit(nbit)
+        # Generate the unpacked weight, folded
+        test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
+        w_packed_u64 = self._weight_pack(test_weight, nbit, nfold)
+
+        assert w_packed_u64.shape[0] == WRAM_BASE_SHAPE[1]
+
+    @staticmethod
+    def _weight_pack(w: WeightType, nbit: int, nfold: int) -> WRAMPackedType:
+        """This prototype function is used to pack the unpacked uint8 weight of size `WRAM_BASE_SHAPE` into \
+            a packed uint64 weight of size (WRAM_BASE_SHAPE[1], WRAM_BASE_SHAPE[0]//64)."""
+        wram_base_shape = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
+
+        # -> 1152*512 uint8
+        wram_unpacked = TestWeightRamMapping._weight_ram_mapping(w, nbit, nfold, 8)
+        wram_base_shape[:, : wram_unpacked.shape[1]] = wram_unpacked
+
+        # -> 512*1152 -> (512*18)*64
+        w_unpacked_aligned = wram_base_shape.T.reshape((-1, N_BIT_PACKED_WEIGHT))
+
+        # -> (512*18)*8 uint8
+        w_packed_u8 = np.packbits(w_unpacked_aligned, axis=1, bitorder="little")
+        assert w_packed_u8.shape[1] == 8
+
+        _n_u64 = WRAM_BASE_SHAPE[0] // N_BIT_PACKED_WEIGHT
+        # -> (512*18)*1 uint64 -> 512*18 uint64
+        w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape((-1, _n_u64))
+
+        return w_packed_u64
+
+
+from paibox.backend.placement import FANOUT_IW8
+
+NEURON_PARAMS_BIT_LENGTH = 214
+N_NEURON_PARAM_IN_COL = HwConfig.N_FANIN_PER_DENDRITE_MAX // NEURON_PARAMS_BIT_LENGTH
+
+
+class TestWeightRamMapping:
+
+    @pytest.mark.parametrize(
+        "shape, wp, lcn_ex",
         [
-            ((8, 8), WW.WEIGHT_WIDTH_8BIT, 2, False),
-            ((32, 32), WW.WEIGHT_WIDTH_8BIT, 2, False),
-            ((16, 16), WW.WEIGHT_WIDTH_4BIT, 4, False),
-            ((30, 24), WW.WEIGHT_WIDTH_4BIT, 4, False),
-            ((32, 24), WW.WEIGHT_WIDTH_2BIT, 3, False),
-            ((32, 24), WW.WEIGHT_WIDTH_1BIT, 3, False),
-            ((31, 23), WW.WEIGHT_WIDTH_8BIT, 5, False),
-            ((1200, 200), WW.WEIGHT_WIDTH_1BIT, 2, False),
-            ((800, 64), WW.WEIGHT_WIDTH_8BIT, 2, False),
-            ((8, 8), WW.WEIGHT_WIDTH_8BIT, 2, True),
-            ((32, 32), WW.WEIGHT_WIDTH_8BIT, 2, True),
-            ((16, 16), WW.WEIGHT_WIDTH_4BIT, 4, True),
-            ((200, 32), WW.WEIGHT_WIDTH_8BIT, 2, True),
-            ((30, 24), WW.WEIGHT_WIDTH_4BIT, 4, True),
-            ((32, 24), WW.WEIGHT_WIDTH_2BIT, 3, True),
+            ((1200, 200), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_2X),
+            ((1000 * 4, 24), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X),
+            ((1000 * 8, 50), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_8X),
+            ((1152 * 2, 120), WW.WEIGHT_WIDTH_2BIT, LCN_EX.LCN_2X),
+            ((16, 16), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_2X),
+            ((80, 5), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_16X),
+            ((800, 60), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_1X),
+            ((800, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_2X),
+            ((1100 * 8, 8), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_8X),
         ],
     )
-    def test_weight_ram_mapping(self, shape, wp, nfold, is_iw8):
+    def test_weight_ram_mapping_iw1(
+        self, shape, wp, lcn_ex, fixed_rng: np.random.Generator
+    ):
+        """A prototype function for testing weight RAM mapping for 1-bit input width.
+
+        NOTE: The shape of unpacked weight mapped in WRAM `wram_unpacked` is (1152(WRAM_BASE_SHAPE[0]), x),    \
+            where x <= 512 (WRAM_BASE_SHAPE[1]).
+        """
+        iw = 1
         nbit = 1 << wp
+        nfold = 1 << lcn_ex
+        # Check the shape[1] is legal
+        assert shape[1] <= _get_max_fanout(iw, wp + lcn_ex)
 
         if shape[0] % nfold > 0:
             expected_h = shape[0] // nfold + 1
@@ -228,39 +274,336 @@ def test_weight_ram_mapping(self, shape, wp, nfold, is_iw8):
         expected_shape = (expected_h, shape[1] * nfold)
 
         # Generate the original weight with shape
-        _low = 0 if nbit == 1 else -(1 << (nbit - 1))
-        _high = 1 << (nbit - 1)
-        test_weight = np.random.randint(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
+        _low, _high = _nbit_limit(nbit)
+        test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
 
         # 1. Fold, return the folded weight after padding.
-        w_folded = self._nfold_weight_ref(test_weight, expected_shape[0], nfold)
+        w_folded = self._fold_raw_weight_single(test_weight, expected_shape[0], nfold)
 
-        # 2. Unpack, get the weight ram.
-        # The real interval is HwConfig.N_FANIN_PER_DENDRITE_ANN
-        _fake_interval = w_folded.shape[0] * 2
-        w_unpacked = self._weight_ram_mapping_ref(
-            w_folded, nbit, is_iw8, _fake_interval
-        )
-        w_unpacked.setflags(write=False)
+        # 2. Map to the WRAM.
+        wram_unpacked = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
+        wram_weight = self._weight_ram_mapping(w_folded, nbit, nfold, iw)
+        wram_unpacked[:, : wram_weight.shape[1]] = wram_weight
 
         # 3. Check
-        self._check(
-            test_weight, w_folded, w_unpacked, nbit, nfold, is_iw8, _fake_interval
+        self._wram_mapping_check_iw1(test_weight, w_folded, wram_unpacked, nbit, nfold)
+
+    @pytest.mark.parametrize(
+        "shape, wp, lcn_ex",
+        [
+            # E*W < 8
+            ((240, 1200), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_2X),
+            ((500, 800), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X),
+            ((200, 800), WW.WEIGHT_WIDTH_2BIT, LCN_EX.LCN_2X),
+            ((144, 876), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_1X),
+            # E*W >= 8
+            ((30, 30), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_8X),
+            ((2200, 100), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_16X),
+            ((30, 24), WW.WEIGHT_WIDTH_2BIT, LCN_EX.LCN_4X),
+            ((100, 15), WW.WEIGHT_WIDTH_2BIT, LCN_EX.LCN_8X),
+            ((30, 24), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_2X),
+            ((550, 40), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_4X),
+            ((1001, 100), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_8X),
+            ((30, 24), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_1X),
+            ((200, 100), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_2X),
+            ((480, 100), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_4X),
+            ((4200, 8), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_32X),
+        ],
+    )
+    def test_weight_ram_mapping_iw8(
+        self, shape, wp, lcn_ex, fixed_rng: np.random.Generator
+    ):
+        """A prototype function for testing weight RAM mapping for 8-bit input width.
+
+        NOTE: The shape of unpacked weight mapped in WRAM `wram_unpacked` is (1152(WRAM_BASE_SHAPE[0]), x),    \
+            where x <= 512 (WRAM_BASE_SHAPE[1]).
+        """
+        iw = 8
+        nbit = 1 << wp
+        nfold = 1 << lcn_ex
+        # Check the shape[1] is legal
+        assert shape[1] <= _get_max_fanout(iw, wp + lcn_ex)
+
+        if shape[0] % nfold > 0:
+            expected_h = shape[0] // nfold + 1
+        else:
+            expected_h = shape[0] // nfold
+
+        expected_shape = (expected_h, shape[1] * nfold)
+
+        # Generate the original weight with shape
+        _low, _high = _nbit_limit(nbit)
+        test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
+
+        # 1. Fold, return the folded weight after padding.
+        w_folded = self._fold_raw_weight_single(test_weight, expected_shape[0], nfold)
+
+        # 2. Map to the NRAM.
+        wram_unpacked = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
+        wram_weight = self._weight_ram_mapping(w_folded, nbit, nfold, iw)
+        wram_unpacked[:, : wram_weight.shape[1]] = wram_weight
+
+        # NOTE: While mapping extra neuron parameters to the WRAM occurs
+        # during the configuration frame export phase, it is tested here.
+        if (n_extra_neurons := shape[1] - WRAM_BASE_SHAPE[1]) > 0:
+            wram_neurons = self._gen_wram_for_neurons(n_extra_neurons, wp, lcn_ex)
+
+            assert wram_weight.shape[1] + wram_neurons.shape[1] <= WRAM_BASE_SHAPE[1]
+            wram_unpacked[:, -wram_neurons.shape[1] :] = wram_neurons
+
+        # TODO how to check
+
+    @staticmethod
+    def _weight_ram_mapping(
+        folded_weights: WeightType, n_bit: int, n_fold: int, iw: Literal[1, 8]
+    ) -> WRAMUnpackedType:
+        if iw == 1:
+            # The length of slot for each bit of input data
+            bit_slot_length = HwConfig.N_FANIN_PER_DENDRITE_SNN
+        else:
+            # N_FANIN_PER_DENDRITE_SNN // iw
+            bit_slot_length = HwConfig.N_FANIN_PER_DENDRITE_ANN
+
+        folded_row, folded_col = folded_weights.shape
+        n_dendrite_comb = n_bit * n_fold
+        # oc * e / (8/w) = oc * d / 8
+        orig_col = folded_col // n_fold
+        result_col = math.ceil(orig_col * n_dendrite_comb / iw)
+        # Units are divided into small blocks of columns, fan-in extension
+        # (oc, lcn, nbit, 144/1152)
+        cew_block = np.zeros(
+            (orig_col, n_fold, n_bit, bit_slot_length), dtype=WRAM_UNPACKED_DTYPE
         )
+        # [N*M] -> [M*N*1]
+        folded_weights_3d = np.expand_dims(folded_weights.T, axis=2).view(
+            WRAM_UNPACKED_DTYPE
+        )
+        for c in range(orig_col):
+            for lcn in range(n_fold):
+                # Unpack the array [N*1] -> [N*8]
+                # [0, :]-> [folded_row, :]: A[0] -> A[folded_row-1]
+                # [:, 0]->[:,7]: LSB->MSB
+                unpacked = np.unpackbits(
+                    folded_weights_3d[c * n_fold + lcn, :, :],
+                    axis=1,
+                    count=n_bit,
+                    bitorder="little",
+                )
+
+                for bit in range(n_bit):
+                    cew_block[c, lcn, bit, :folded_row] = unpacked[:, bit].squeeze()
+
+        if n_dendrite_comb >= iw:  # For SNN mode, it must go into this case
+            # At least 1 fan-in is required to be combined in one column
+            result = cew_block.reshape((result_col, -1)).T
+        else:
+            # 2/4/8 original columns are combined in one column
+            n_col_comb_in_col = iw // n_dendrite_comb
+            cew_block = cew_block.reshape((orig_col, -1))
+
+            if (r := orig_col % n_col_comb_in_col) > 0:
+                cew_block = np.pad(cew_block, ((0, n_col_comb_in_col - r), (0, 0)))
+
+            # Now, length of padded columns is a multiple of 'n_col_comb_in_col'
+            assert cew_block.shape[0] % n_col_comb_in_col == 0
+            result = cew_block.reshape((cew_block.shape[0] // n_col_comb_in_col, -1)).T
+
+            # For n_dendrite_comb = 1, the #C columns of result <= FANOUT_IW8[0]/8
+            # For n_dendrite_comb = 2, #C <= FANOUT_IW8[1]/4
+            # For n_dendrite_comb = 4, #C <= FANOUT_IW8[2]/2
+            assert (
+                result.shape[1]
+                <= FANOUT_IW8[n_dendrite_comb.bit_length() - 1] // n_col_comb_in_col
+            )
+
+        assert np.max(result, axis=None) <= 1
+        assert np.min(result, axis=None) >= 0
+
+        return result
+
+    @pytest.mark.parametrize(
+        "shape, wp, lcn_ex, expectation",
+        [
+            # E*W=1
+            ((120, 1888), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_1X, nullcontext()),
+            (
+                (120, 1889),
+                WW.WEIGHT_WIDTH_1BIT,
+                LCN_EX.LCN_1X,
+                pytest.raises(AssertionError),
+            ),
+            # E*W=2
+            ((288, 1364), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_2X, nullcontext()),
+            (
+                (144, 1365),
+                WW.WEIGHT_WIDTH_2BIT,
+                LCN_EX.LCN_1X,
+                pytest.raises(AssertionError),
+            ),
+            # E*W=4
+            ((144 * 4, 876), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X, nullcontext()),
+            (
+                (144, 877),
+                WW.WEIGHT_WIDTH_4BIT,
+                LCN_EX.LCN_1X,
+                pytest.raises(AssertionError),
+            ),
+            ((120, 876), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_1X, nullcontext()),
+            (
+                (240, 877),
+                WW.WEIGHT_WIDTH_2BIT,
+                LCN_EX.LCN_2X,
+                pytest.raises(AssertionError),
+            ),
+        ],
+    )
+    def test_weight_ram_mapping_neurons_limit(
+        self, shape, wp, lcn_ex, expectation, fixed_rng: np.random.Generator
+    ):
+        """Test cases about neurons limit, only for 8-bit input width & #N of combined dendrites < 8."""
+        assert wp + lcn_ex <= 2
+        iw = 8
+        nbit = 1 << wp
+        nfold = 1 << lcn_ex
+
+        if shape[0] % nfold > 0:
+            expected_h = shape[0] // nfold + 1
+        else:
+            expected_h = shape[0] // nfold
+
+        expected_shape = (expected_h, shape[1] * nfold)
+
+        # Generate the original weight with shape
+        _low, _high = _nbit_limit(nbit)
+        test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
+
+        # 1. Fold, return the folded weight after padding.
+        w_folded = self._fold_raw_weight_single(test_weight, expected_shape[0], nfold)
+
+        # 2. Map to the NRAM.
+        with expectation:
+            w_mapped = self._weight_ram_mapping(w_folded, nbit, nfold, iw)
+
+    @staticmethod
+    def _weight_ram_mapping_iw8(
+        folded_weights: WeightType,
+        n_bit: int,
+        n_fold: int,
+        wbit_slot_length: int = HwConfig.N_FANIN_PER_DENDRITE_ANN,
+    ):
+        """A prototype function for weight ram mapping for 8-bit input width."""
+        row, col = folded_weights.shape
+        orig_col = col // n_fold
+        _n_block_in_row = 8  # iw = 8
+        dendrite_comb_rate = n_bit * n_fold
+        # oc * e / (8/w) = oc * d / 8
+        result_col = math.ceil(orig_col * dendrite_comb_rate / _n_block_in_row)
+        result = np.zeros(
+            (_n_block_in_row * wbit_slot_length, result_col), dtype=np.uint8
+        )
+        # Units are divided into small blocks of columns, fan-in extension
+        # Each block contains N-bits * 144 (slot length)
+        cew_block = np.zeros(
+            (orig_col, n_fold, n_bit, wbit_slot_length), dtype=np.uint8
+        )
+        # [N*M] -> [M*N*1]
+        folded_weights_3d = np.expand_dims(folded_weights.T, axis=2).view(np.uint8)
+
+        for c in range(orig_col):
+            for lcn in range(n_fold):
+                # For every m in M, unpack the array [N*1] -> [N*8]
+                # [0, :]-> [row, :]: A[0] -> A[row-1]
+                # [:, 0]->[:,7]: LSB->MSB
+                unpacked = np.unpackbits(
+                    folded_weights_3d[c * n_fold + lcn, :, :],
+                    axis=1,
+                    count=n_bit,
+                    bitorder="little",
+                )
+
+                for bit in range(n_bit):
+                    cew_block[c, lcn, bit, :row] = unpacked[:, bit].squeeze()
+
+        # if n_bit < 8:
+        #     if dendrite_comb_rate > _n_block_in_row:  # W<8, E*W>8
+        #         # How many fan-ins are combined in one column
+        #         n_lcn_comb_in_col = _n_block_in_row // n_bit  # <n_fold
+        #         # For all fan-ins on the original column, how many columns are needed to accommodate
+        #         n_col_lcn_accom, r = divmod(n_fold, n_lcn_comb_in_col)
+        #         assert r == 0
+        #         result3 = cew_block.reshape((result_col, -1)).T
+        #         cew_block = cew_block.reshape((orig_col, n_col_lcn_accom, -1))
+
+        #         for c, l in np.ndindex(cew_block.shape[:2]):
+        #             result[:, c * n_col_lcn_accom + l] = cew_block[c, l, :].ravel()
+
+        #         result2 = cew_block.reshape((result_col, -1)).T
+        #         assert np.array_equal(result, result2)
+        #         assert np.array_equal(result, result3)
+        #     else:  # W<8, E*W<=8
+        #         # How many original columns are combined in one column
+        #         n_col_comb_in_col = _n_block_in_row // dendrite_comb_rate  # 1 < x <= 8
+        #         cew_block = cew_block.reshape((orig_col, -1))
+
+        #         for c in range(cew_block.shape[0]):
+        #             col_idx, row_idx = divmod(c, n_col_comb_in_col)
+        #             result[
+        #                 row_idx
+        #                 * cew_block.shape[-1] : (row_idx + 1)
+        #                 * cew_block.shape[-1],
+        #                 col_idx,
+        #             ] = cew_block[c, :].ravel()
+
+        #         if (r := orig_col % n_col_comb_in_col) > 0:
+        #             cew_block = np.pad(cew_block, ((0, n_col_comb_in_col - r), (0, 0)))
+
+        #         # Now, length of padded columns is a multiple of 'n_col_comb_in_col'
+        #         assert cew_block.shape[0] % n_col_comb_in_col == 0
+        #         result2 = cew_block.reshape(
+        #             (cew_block.shape[0] // n_col_comb_in_col, -1)
+        #         ).T
+        #         assert np.array_equal(result, result2)
+        # else:  # W=8, EW>=8
+        #     result2 = cew_block.reshape((result_col, -1)).T
+        #     cew_block = cew_block.reshape((orig_col, n_fold, -1))
+        #     result = cew_block.reshape((orig_col * n_fold, -1)).T
+
+        #     assert np.array_equal(result, result2)
+
+        if dendrite_comb_rate >= _n_block_in_row:
+            # At least 1 fan-in is required to be combined in one column
+            result999 = cew_block.reshape((result_col, -1)).T
+        else:
+            # 2/4/8 original columns are combined in one column
+            n_col_comb_in_col = _n_block_in_row // dendrite_comb_rate
+            cew_block = cew_block.reshape((orig_col, -1))
+
+            if (r := orig_col % n_col_comb_in_col) > 0:
+                cew_block = np.pad(cew_block, ((0, n_col_comb_in_col - r), (0, 0)))
+
+            # Now, length of padded columns is a multiple of 'n_col_comb_in_col'
+            assert cew_block.shape[0] % n_col_comb_in_col == 0
+            result999 = cew_block.reshape(
+                (cew_block.shape[0] // n_col_comb_in_col, -1)
+            ).T
+
+        # assert np.max(result, axis=None) <= 1
+        # assert np.min(result, axis=None) >= 0
+
+        assert np.max(result999, axis=None) <= 1
+        assert np.min(result999, axis=None) >= 0
+
+        return result
 
     @staticmethod
-    def _nfold_weight_ref(raw_weight: WeightType, expected_row: int, nfold: int):
+    def _fold_raw_weight_single(raw_weight: WeightType, expected_row: int, nfold: int):
         raw_row, raw_col = raw_weight.shape
 
-        if raw_row % nfold > 0:
-            _padding = nfold - raw_row % nfold
+        if (r := raw_row % nfold) > 0:
+            _padding = nfold - r
             assert expected_row * nfold == raw_row + _padding
 
-            w_padding = np.append(
-                raw_weight,
-                values=np.zeros((_padding, raw_col), dtype=WEIGHT_DTYPE),
-                axis=0,
-            )
+            w_padding = np.pad(raw_weight, ((0, _padding), (0, 0)))
         else:
             w_padding = raw_weight
 
@@ -273,25 +616,12 @@ def _nfold_weight_ref(raw_weight: WeightType, expected_row: int, nfold: int):
 
         return w_folded
 
+    # at commit 67054d8
     @staticmethod
-    def _weight_ram_mapping_ref(
-        folded_weights: WeightType,
-        n_bit: int,
-        is_iw8: bool,
-        fake_interval: int,
-    ):
+    def _weight_ram_mapping_iw1_old(folded_weights: np.ndarray, n_bit: int):
+        """Old weight ram mapping for 1-bit input width."""
         row, col = folded_weights.shape
-        # if iw = 1, the row of result is the same as the row of folded_weights
-        if not is_iw8:
-            result_row = row
-        else:
-            result_row = 8 * fake_interval
-
-        result = np.zeros((result_row, col * n_bit), dtype=np.uint8)
-
-        if n_bit == 1:
-            result[:row, :col] = folded_weights
-            return result
+        result = np.zeros((row, col * n_bit), dtype=np.uint8)
 
         # [N*M] -> [M*N*1]
         folded_weights_3d = np.expand_dims(folded_weights.T, axis=2).astype(np.uint8)
@@ -302,13 +632,7 @@ def _weight_ram_mapping_ref(
                 folded_weights_3d[i], axis=1, count=n_bit, bitorder="little"
             )
 
-            if not is_iw8:
-                result[:row, n_bit * i : n_bit * (i + 1)] = unpacked
-            else:
-                for bit in range(n_bit):
-                    result[bit * fake_interval : bit * fake_interval + row, i] = (
-                        unpacked[:, bit]
-                    )
+            result[:, n_bit * i : n_bit * (i + 1)] = unpacked
 
         assert np.max(result, axis=None) <= 1
         assert np.min(result, axis=None) >= 0
@@ -316,63 +640,124 @@ def _weight_ram_mapping_ref(
         return result
 
     @staticmethod
-    def _check(
+    def _wram_mapping_check_iw1(
         test_data: WeightType,
         w_folded: WeightType,
         w_unpacked: WRAMUnpackedType,
         nbit: int,
         nfold: int,
-        is_iw8: bool,
-        fake_interval: int = 0,
     ) -> None:
         for i, j in np.ndindex(test_data.shape):
             n_in_col = w_folded.shape[0]
-            now_i = i % n_in_col
-            offset_j = i // n_in_col
+            offset_j, now_i = divmod(i, n_in_col)
             now_j = offset_j + j * nfold
 
-            if not is_iw8:
-                wij = w_unpacked[now_i, now_j * nbit : (now_j + 1) * nbit]
-            else:
-                # From LSB to MSB
-                bits = [
-                    w_unpacked[i * fake_interval + now_i, now_j] for i in range(nbit)
-                ]
-                wij = np.asarray(bits, dtype=np.uint8)
+            wij = w_unpacked[now_i, now_j * nbit : (now_j + 1) * nbit]
 
-            wij_packed = packbits_ref(wij, nbit)
+            wij_packed = _packbits_ref(wij, nbit)
             assert test_data[i, j] == wij_packed
 
-    def test_CorePlacement_weight_pack_shape(self):
-        # Mock unpacked weight
-        w_unpacked = np.zeros(CorePlacement.WRAM_BASE_SHAPE, dtype=np.uint8)
-        w_packed_u64 = CorePlacement._weight_pack(w_unpacked)
+    @staticmethod
+    def _wram_mapping_check_iw8(
+        test_data: WeightType,
+        w_folded: WeightType,
+        w_unpacked: WRAMUnpackedType,
+        nbit: int,
+        nfold: int,
+    ) -> None:
+        pass
 
-        assert w_packed_u64.shape == (
-            (HwConfig.ADDR_RAM_MAX + 1),
-            (HwConfig.ADDR_AXON_MAX + 1) // (WRAM_PACKED_DTYPE(1).nbytes * 8),
+    @pytest.mark.parametrize(
+        "shape, wp, lcn_ex",
+        [
+            # E*W < 8
+            ((240, 1200), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_2X),
+            ((500, 800), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X),
+            ((200, 800), WW.WEIGHT_WIDTH_2BIT, LCN_EX.LCN_2X),
+            ((200, 811), WW.WEIGHT_WIDTH_2BIT, LCN_EX.LCN_2X),
+            ((144, 876), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_1X),
+        ],
+    )
+    def test_weight_ram_mapping_for_neurons(self, shape, wp, lcn_ex):
+        """This test is for extra neurons parameters mapping on the WRAM for 8-bit input width."""
+        assert wp + lcn_ex <= 2
+
+        n_extra_neurons = shape[1] - WRAM_BASE_SHAPE[1]
+        wram_neurons = self._gen_wram_for_neurons(n_extra_neurons, wp, lcn_ex)
+
+    @staticmethod
+    def _gen_wram_for_neurons(n_extra_neurons: int, wp, lcn_ex):
+        """A prototype function for mapping extra neurons parameters on the WRAM for 8-bit input width.
+
+        NOTE: The shape of final result` is (1152(WRAM_BASE_SHAPE[0]), x), where x <= 512 (WRAM_BASE_SHAPE[1]).
+        """
+        extra_neurons = pb.ANNNeuron(n_extra_neurons, bit_trunc=15)
+        # extra_neurons = pb.ANNBypassNeuron(n_extra_neurons)
+        dest_info = _gen_random_neuron_dest_info(n_extra_neurons)
+
+        # TODO Current APIs are not enough to generate the parameters of neurons directly.
+        frame3 = OfflineFrameGen.gen_config_frame3(
+            Coord(31, 31),
+            Coord(0, 0),
+            RId(0, 0),
+            0,
+            n_extra_neurons,
+            NeuronAttrs.model_validate(extra_neurons.attrs(all=False)),
+            dest_info,
+            1,
+        )
+
+        neuron_params_214b = np.zeros(
+            (n_extra_neurons, NEURON_PARAMS_BIT_LENGTH), dtype=WRAM_UNPACKED_DTYPE
         )
 
-    def test_packbits_to_mapping_form(self, fixed_rng: np.random.Generator):
-        def _weight_ram_T(weight_ram_mapped: np.ndarray):
-            _w = weight_ram_mapped.T.reshape(-1, 64)
-            w_packed_u8 = np.packbits(_w, axis=-1, bitorder="little")
+        for i in range(n_extra_neurons):
+            # A neuron's parameters are packed in 4 single packages
+            params = frame3.packages[i * 4 : (i + 1) * 4]
+            # [0:NEURON_PARAMS_BIT_LENGTH]:LSB to MSB + [NEURON_PARAMS_BIT_LENGTH:]:0
+            neuron_params_214b[i, :] = np.unpackbits(
+                params.view(WRAM_UNPACKED_DTYPE), axis=0, bitorder="little"
+            )[:NEURON_PARAMS_BIT_LENGTH]
+
+        # Slow method
+        n_col_avail = math.ceil(
+            (_get_max_fanout(8, wp + lcn_ex) - WRAM_BASE_SHAPE[1])
+            / N_NEURON_PARAM_IN_COL
+        )
+        wram_neurons_slow = np.zeros(
+            (WRAM_BASE_SHAPE[0], n_col_avail), dtype=WRAM_UNPACKED_DTYPE
+        )
+        for i in range(n_extra_neurons):
+            idx_col, idx_in_col = divmod(i, N_NEURON_PARAM_IN_COL)
+            wram_neurons_slow[
+                idx_in_col
+                * NEURON_PARAMS_BIT_LENGTH : (idx_in_col + 1)
+                * NEURON_PARAMS_BIT_LENGTH,
+                idx_col,
+            ] = neuron_params_214b[i, :].squeeze()
+        # Slow method ends.
+
+        # Pad the row of neuron parameters to a multiple of `N_NEURON_PARAM_IN_COL`
+        if (r := neuron_params_214b.shape[0] % N_NEURON_PARAM_IN_COL) > 0:
+            neuron_params_214b = np.pad(
+                neuron_params_214b, ((0, N_NEURON_PARAM_IN_COL - r), (0, 0))
+            )
 
-            return w_packed_u8
+        n_bit_nparams = NEURON_PARAMS_BIT_LENGTH * N_NEURON_PARAM_IN_COL
 
-        w = fixed_rng.integers(-8, 8, size=(1152, 64), dtype=WEIGHT_DTYPE)
+        neuron_params_214b = neuron_params_214b.reshape((-1, n_bit_nparams))
+        _n_col_occupied = neuron_params_214b.shape[0]
 
-        # 1152 * 512
-        w1 = self._weight_ram_mapping_ref(w, 8, False, 0)
+        result = np.zeros(
+            (WRAM_BASE_SHAPE[0], _n_col_occupied), dtype=WRAM_UNPACKED_DTYPE
+        )
+        result[:n_bit_nparams] = neuron_params_214b.T
 
-        # -> 512 * 1152 -> 512 * 144 (uint8)
-        wT = _weight_ram_T(w1)
+        assert np.array_equal(result, wram_neurons_slow[:, :_n_col_occupied])
 
-        ww = wT.view(np.uint64).reshape(-1, 18)
-        ww.setflags(write=False)
-        assert 1
+        return result
 
-    def test_weight_ram_mapping_8bits(self, packbits8):
+    def test_weight_ram_mapping_8bits(self):
         binary_conn = np.zeros((6, 8 * 5), dtype=np.bool_)
         wp = WW.WEIGHT_WIDTH_8BIT
 
@@ -390,7 +775,7 @@ def test_weight_ram_mapping_8bits(self, packbits8):
 
             assert expected == r
 
-    def test_weight_ram_mapping_4bits(self, packbits4):
+    def test_weight_ram_mapping_4bits(self):
         binary_conn = np.zeros((6, 4 * 5), dtype=np.bool_)
         wp = WW.WEIGHT_WIDTH_4BIT
 
@@ -413,7 +798,7 @@ def test_weight_ram_mapping_4bits(self, packbits4):
 
             assert expected == r
 
-    def test_weight_ram_mapping_2bits(self, packbits2):
+    def test_weight_ram_mapping_2bits(self):
         binary_conn = np.zeros((6, 4 * 5), dtype=np.bool_)
         wp = WW.WEIGHT_WIDTH_2BIT
 

From 564b7570c9703cbe8a4551706c5dd10c2f5f9443 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sun, 22 Sep 2024 22:52:08 +0800
Subject: [PATCH 090/187] =?UTF-8?q?=E2=9C=A8=20support=20weight=20mapping?=
 =?UTF-8?q?=20to=20the=20WRAM=20for=20both=201/8-bit=20input=20width?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/placement.py | 127 ++++++++++++++++++------------------
 paibox/backend/types.py     |   2 +
 2 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index e3f54daa..91a0001d 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -1,3 +1,4 @@
+import math
 import warnings
 from functools import cached_property
 from typing import ClassVar, Literal, Optional, overload
@@ -7,12 +8,7 @@
 from paicorelib import WeightWidth as WW
 
 from paibox.components import FullConnectedSyn, Neuron
-from paibox.exceptions import (
-    GraphBuildError,
-    NotSupportedError,
-    ResourceError,
-    TruncationWarning,
-)
+from paibox.exceptions import GraphBuildError, ResourceError, TruncationWarning
 from paibox.types import WEIGHT_DTYPE, WeightType
 from paibox.utils import check_attr_same
 
@@ -23,6 +19,7 @@
     _COORD_UNSET,
     WRAM_PACKED_DTYPE,
     WRAM_UNPACKED_DTYPE,
+    N_BIT_PACKED_WEIGHT,
     AxonCoord,
     AxonSegment,
     CoreAbstract,
@@ -437,12 +434,7 @@ def _fold_raw_weights(self, raw_weights: list[WeightType]) -> WeightType:
         """Fold the weights into LCN-sized blocks."""
         w_folded_list = []
         w_folded_of_axon_segs = []
-        # See the note of function `_weight_ram_mapping` below.
-        n_fold = (
-            self.n_timeslot
-            if self.rt_mode.is_snn
-            else 1 << (self.dendrite_comb_rate - 3)
-        )
+        n_fold = self.n_timeslot
 
         if self.lcn_ex == LCN_EX.LCN_1X:
             return np.hstack(raw_weights)
@@ -473,62 +465,73 @@ def _fold_raw_weights(self, raw_weights: list[WeightType]) -> WeightType:
         return np.hstack(w_folded_list)
 
     def _weight_ram_mapping(self) -> WRAMPackedType:
-        """Map the raw weights to the weight RAM(WRAM). The mapping is different for both input widths.
-
-        NOTE: When the input width is 8 bits, no neurons need to be mapped to the WRAM when the combination rate of \
-            dentrites >= 8, while some neurons need to be mapped to the WRAM when < 8.
-
-            When the input width is 8 bits and with the combination rate of dentrites > 3, the mapping of weights   \
-            becomes the key to limiting neuron capacity. In this case, if the weight accuracy is less than 8 bits   \
-            (which may also occur when the weight accuracy is optimized), the weight cannot be folded directly in   \
-            the fan-in expansion direction, otherwise the column of the WRAM will exceed the upper limit(512).      \
+        """Map the raw weights to the weight RAM(WRAM). The mapping is different for 1 & 8-bit input widths.
 
-            A portion of the fan-in needs to be expanded to an unfilled portion in the direction of the weight      \
-            accuracy. At this point, n_fold=n_timeslot/(8/n_weight_bits)=2^(dendrite_comb_rate - 3). For example,   \
-            for LCN_8X & WW8, the n_fold is 3. For LCN_32X & WW4, the n_fold is 4 (instead of 5).
+        NOTE: When the input width is 1-bit, no neurons need to be mapped to the WRAM. When the input width is 8-bit,   \
+            some neurons may be mapped to the WRAM when the #N of neurons inside the core placement > 512.
 
-        TODO Now, in ANN mode, only the mapping of 8-bit weights is supported. The weight accuracy optimization is  \
-            supposed to disable manually for now.
+            This function was tested using only the prototype functions. For test items, please refer to                \
+            tests/backend/test_placement.py::TestWeightRamMapping for details.
         """
-        if not self.rt_mode.is_snn and self.weight_width < WW.WEIGHT_WIDTH_8BIT:
-            raise NotSupportedError("only support 8-bit weights in ANN mode.")
-
-        _weights_folded = self._fold_raw_weights(self.raw_weights)
-        row, col = _weights_folded.shape
-        # The 1152*512 unpacked weight
-        w_unpacked = np.zeros(self.WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
-
-        if self.n_weight_bits == 1:
-            w_unpacked[:row, :col] = _weights_folded
+        w_folded = self._fold_raw_weights(self.raw_weights)
+        folded_row, _ = w_folded.shape
+        # The 1152*512 unpacked weight, uint8 but only 0 & 1.
+        wram_unpacked = np.zeros(self.WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
+
+        if is_iw8(self.rt_mode):
+            # The length of slot for each bit of input data
+            iw, bit_slot_length = 8, HwConfig.N_FANIN_PER_DENDRITE_ANN
         else:
-            # (N, M)(int8) -> (M, N, 1)(uint8)
-            w_folded_3d = np.expand_dims(_weights_folded.T, axis=2).astype(
-                WRAM_UNPACKED_DTYPE
-            )
-
-            _n_group_bit = HwConfig.N_FANIN_PER_DENDRITE_ANN
+            iw, bit_slot_length = 1, HwConfig.N_FANIN_PER_DENDRITE_SNN
+
+        n_dendrite_comb = 1 << self.dendrite_comb_rate
+        # oc * e / (8/w) = oc * d / 8
+        orig_col = self.n_neuron
+        result_col = math.ceil(orig_col * n_dendrite_comb / iw)
+        # Units are divided into small blocks of columns, fan-in extension
+        cew_block = np.zeros(
+            (orig_col, self.n_timeslot, self.n_weight_bits, bit_slot_length),
+            dtype=WRAM_UNPACKED_DTYPE,
+        )
 
-            for i in range(col):
+        # (N, M)(int8) -> (M, N, 1)(uint8)
+        w_folded_3d = np.expand_dims(w_folded.T, axis=2).view(
+            WRAM_UNPACKED_DTYPE
+        )
+        for c in range(orig_col):
+            for lcn in range(self.n_timeslot):
                 # For every column, unpack the array (N, 1) -> (N, n_weight_bits)
                 unpacked = np.unpackbits(
-                    w_folded_3d[i],
+                    w_folded_3d[c * self.n_timeslot + lcn, :, :],
                     axis=1,
                     count=self.n_weight_bits,
-                    bitorder=HwConfig.WEIGHT_BITORDER,
+                    bitorder="little",
                 )
 
-                if self.rt_mode.is_snn:
-                    w_unpacked[
-                        :row, self.n_weight_bits * i : self.n_weight_bits * (i + 1)
-                    ] = unpacked
-                else:
-                    # In the case of 8-bit input width, the weights are mapped differently
-                    for bit in range(self.n_weight_bits):
-                        w_unpacked[bit * _n_group_bit : bit * _n_group_bit + row, i] = (
-                            unpacked[:, bit]
-                        )
+                for bit in range(self.n_weight_bits):
+                    cew_block[c, lcn, bit, :folded_row] = unpacked[:, bit].squeeze()
+
+        if n_dendrite_comb >= iw:  # For 1-bit input width, it must go into this case
+            # At least 1 fan-in is required to be combined in one column
+            w_mapped = cew_block.reshape((result_col, -1)).T
+        else:
+            # 2/4/8 original columns are combined in one column
+            n_col_comb_in_col = iw // n_dendrite_comb
+            cew_block = cew_block.reshape((orig_col, -1))
+
+            if (r := orig_col % n_col_comb_in_col) > 0:
+                cew_block = np.pad(cew_block, ((0, n_col_comb_in_col - r), (0, 0)))
+
+            # Now, length of padded columns is a multiple of 'n_col_comb_in_col'
+            w_mapped = cew_block.reshape(
+                (cew_block.shape[0] // n_col_comb_in_col, -1)
+            ).T
+
+        # For 8-bit input width, here is only the weight mapped to the WRAM. Extra neurons
+        # paramaters will be mapped to the WRAM when exporting the configuration frames.
+        wram_unpacked[:, : w_mapped.shape[1]] = w_mapped
 
-        return self._weight_pack(w_unpacked)
+        return self._weight_pack(wram_unpacked)
 
     @staticmethod
     def _nfold_weight(
@@ -543,11 +546,10 @@ def _nfold_weight(
         """
         raw_row, raw_col = raw_weight.shape
 
-        if raw_row % n_fold > 0:
-            n_row_padding = n_fold - raw_row % n_fold
+        if (r := raw_row % n_fold) > 0:
             _raw_weight = np.append(
                 raw_weight,
-                np.zeros((n_row_padding, raw_col), dtype=WEIGHT_DTYPE),
+                np.zeros((n_fold - r, raw_col), dtype=WEIGHT_DTYPE),
                 axis=0,
             )
         else:
@@ -568,18 +570,17 @@ def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
             contains 18 uint64.
             (1152, 512) -> T -> (512*18, 64) -> (512*18, 8) uint8 -> (512*18, 1) uint64 -> (512, 18) uint64.
         """
-        _n_bit_packed = WRAM_PACKED_DTYPE(1).nbytes * 8  # #N bit of packed dtype
         # #N of u64 on each NRAM address
-        _n_u64_naddr = CorePlacement.WRAM_BASE_SHAPE[0] // _n_bit_packed
+        _n_u64_naddr = CorePlacement.WRAM_BASE_SHAPE[0] // N_BIT_PACKED_WEIGHT
 
         # Reshape to 64 columns to avoid contiguous problem.
-        w_unpacked_aligned = w_unpacked.T.reshape(-1, _n_bit_packed)
+        w_unpacked_aligned = w_unpacked.T.reshape((-1, N_BIT_PACKED_WEIGHT))
         # (512*18, 64) uint8 -> (512*18, 8) uint8
         w_packed_u8 = np.packbits(
             w_unpacked_aligned, axis=1, bitorder=HwConfig.WEIGHT_BITORDER
         )
         # (512*18, 8) uint8 -> (512*18, 1) uint64 -> (512, 18) uint64
-        w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape(-1, _n_u64_naddr)
+        w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape((-1, _n_u64_naddr))
         w_packed_u64.setflags(write=False)
 
         return w_packed_u64
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index bed63c20..f4a77780 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -51,6 +51,8 @@
 WRAMUnpackedType: TypeAlias = NDArray[WRAM_UNPACKED_DTYPE]
 # Type of packed weight in WRAM
 WRAMPackedType: TypeAlias = NDArray[WRAM_PACKED_DTYPE]
+N_BIT_PACKED_WEIGHT = WRAM_PACKED_DTYPE(1).nbytes * 8  # #N bits of packed weight
+
 _COORD_UNSET = 0
 _DEGREE_UNSET = -1
 

From 8acaa2f96eb5763fa31d5f6a20400c17b4affd26 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 23 Sep 2024 09:49:57 +0800
Subject: [PATCH 091/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactoring=20the?=
 =?UTF-8?q?=20test=20fixtures=20to=20simplify=20the=20test=20setup?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/conftest.py           | 187 +---------------------------
 tests/backend/test_conf_template.py | 175 +++++++++++++++++++++++---
 2 files changed, 156 insertions(+), 206 deletions(-)

diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index 48387125..d6bfcb30 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -1,32 +1,12 @@
 import random
-from functools import partial
 from typing import Optional
 
 import numpy as np
 import pytest
-from paicorelib import (
-    LCN_EX,
-    Coord,
-    CoordOffset,
-    CoreMode,
-    HwConfig,
-    MaxPoolingEnable,
-    RoutingCoord,
-    RoutingDirection,
-    RoutingLevel,
-)
+from paicorelib import LCN_EX, RoutingCoord, RoutingDirection, RoutingLevel
 from paicorelib import WeightWidth as WW
-from paicorelib.reg_model import TICK_WAIT_END_MAX, TICK_WAIT_START_MAX
 
 import paibox as pb
-from paibox.backend.conf_template import (
-    CoreConfig,
-    CorePlmConfig,
-    InputNeuronDest,
-    NeuronConfig,
-    NeuronDest,
-    NeuronDestInfo,
-)
 from paibox.backend.routing import RoutingCluster
 from paibox.backend.types import AxonCoord, AxonSegment, NeuSegment
 from paibox.exceptions import ResourceError
@@ -809,171 +789,6 @@ def get_mapper() -> pb.Mapper:
     return pb.Mapper()
 
 
-@pytest.fixture
-def MockCoreConfigDict() -> CoreConfig:
-    wp = random.choice(list(WW))
-    lcn_ex = random.choice(list(LCN_EX))
-
-    iwf, swf, sme = random.choice(list(CoreMode)).conf
-
-    num_den = random.randint(1, HwConfig.N_DENDRITE_MAX_SNN)
-    mpe = random.choice(list(MaxPoolingEnable))
-    tws = random.randint(0, TICK_WAIT_START_MAX)
-    twe = random.randint(0, TICK_WAIT_END_MAX)
-    target_lcn = random.choice(list(LCN_EX))
-    test_chip_addr = Coord(random.randint(0, 31), random.randint(0, 31))
-
-    return CoreConfig(
-        "mock_core",
-        wp,
-        lcn_ex,
-        iwf,
-        swf,
-        num_den,
-        mpe,
-        tws,
-        twe,
-        sme,
-        target_lcn,
-        test_chip_addr,
-    )
-
-
-@pytest.fixture
-def MockNeuronConfig() -> NeuronConfig:
-    n_channel = 3
-    _n_per_ch = random.randint(20, 100)
-    n = n_channel * _n_per_ch
-    offset = random.randint(1, 100)
-    interval = random.randint(1, 2)
-    thres = random.randint(1, 5)
-    reset_v = random.randint(-5, 5)
-    leak_v = np.arange(n_channel * n).reshape((n_channel, n))
-    neuron = pb.LIF((n_channel, n), thres, reset_v, bias=leak_v, keep_shape=True)
-    dest_coord_start = Coord(random.randint(0, 10), random.randint(0, 10))
-    test_chip_addr = Coord(random.randint(0, 31), random.randint(0, 31))
-
-    _n_start = random.randint(0, 20)
-    nseg = NeuSegment(
-        neuron, slice(_n_start, 1 * _n_per_ch + _n_start), offset, interval
-    )
-
-    axon_coords = [AxonCoord(0, i) for i in range(nseg.n_neuron)]
-    dest_coords = [dest_coord_start, dest_coord_start + CoordOffset(0, 1)]
-    pb.BACKEND_CONFIG.test_chip_addr = test_chip_addr
-
-    return NeuronConfig.encapsulate(
-        nseg, axon_coords, dest_coords, pb.BACKEND_CONFIG.test_chip_addr
-    )
-
-
-@pytest.fixture
-def MockNeuronDestInfo(MockNeuronConfig) -> NeuronDestInfo:
-    return MockNeuronConfig.neuron_dest_info
-
-
-@pytest.fixture
-def MockNeuronDest() -> NeuronDest:
-    n = random.randint(100, 1000)
-    tick_relative = [0 for _ in range(n)]
-    addr_axon = [i for i in range(n)]
-
-    addr_core_x = random.randint(0, 31)
-    addr_core_y = random.randint(0, 31)
-    addr_core_x_ex = random.randint(0, 31)
-    addr_core_y_ex = random.randint(0, 31)
-    addr_chip_x = random.randint(0, 31)
-    addr_chip_y = random.randint(0, 31)
-
-    return NeuronDest(
-        tick_relative,
-        addr_axon,
-        addr_core_x,
-        addr_core_y,
-        addr_core_x_ex,
-        addr_core_y_ex,
-        addr_chip_x,
-        addr_chip_y,
-    )
-
-
-@pytest.fixture
-def MockInputNeuronDest():
-    n = random.randint(100, 1000)
-    tick_relative = [0 for _ in range(n)]
-    addr_axon = [i for i in range(n)]
-
-    addr_core_x = random.randint(0, 31)
-    addr_core_y = random.randint(0, 31)
-    addr_core_x_ex = random.randint(0, 31)
-    addr_core_y_ex = random.randint(0, 31)
-    addr_chip_x = random.randint(0, 31)
-    addr_chip_y = random.randint(0, 31)
-    lcn = 1 << random.choice(list(LCN_EX))
-
-    return InputNeuronDest(
-        tick_relative,
-        addr_axon,
-        addr_core_x,
-        addr_core_y,
-        addr_core_x_ex,
-        addr_core_y_ex,
-        addr_chip_x,
-        addr_chip_y,
-        lcn,
-    )
-
-
-@pytest.fixture
-def MockCorePlmConfig(MockCoreConfigDict, MockNeuronConfig):
-    n = random.randint(100, 400)
-    thres = random.randint(1, 5)
-    reset_v = random.randint(-5, 5)
-    neuron = pb.IF((n,), thres, reset_v)
-
-    cpc = CorePlmConfig.encapsulate(
-        random.randint(0, 1000),
-        np.random.randint(0, 100, size=(1152, 512), dtype=np.uint64),
-        MockCoreConfigDict,
-        {neuron: MockNeuronConfig},
-    )
-
-    return cpc
-
-
-def packbits_ref(bits: np.ndarray, count: int) -> int:
-    """Pack unsigned bits into a signed integer.
-
-    This is a test of the prototype of the original function.
-    """
-    _bits = np.append(bits[: count - 1], bits[-1])
-
-    result = sum(bit << i for i, bit in enumerate(_bits))
-    result -= _bits[-1] << count
-
-    return result
-
-
-@pytest.fixture
-def packbits8():
-    return partial(packbits_ref, count=8)
-
-
-@pytest.fixture
-def packbits4():
-    return partial(packbits_ref, count=4)
-
-
-@pytest.fixture
-def packbits2():
-    return partial(packbits_ref, count=2)
-
-
-@pytest.fixture
-def packbits1():
-    return partial(packbits_ref, count=1)
-
-
 def n_axon2lcn_ex_proto(n_axon, n_fanin_max) -> LCN_EX:
     if n_axon < 1:
         raise ValueError
diff --git a/tests/backend/test_conf_template.py b/tests/backend/test_conf_template.py
index b7b8e3c1..813836c1 100644
--- a/tests/backend/test_conf_template.py
+++ b/tests/backend/test_conf_template.py
@@ -1,9 +1,16 @@
 import random
-
-from paicorelib import Coord, HwConfig
-
+import numpy as np
+import pytest
 import paibox as pb
+
+from paicorelib import Coord, CoordOffset, CoreMode, HwConfig, LCN_EX, MaxPoolingEnable
+from paicorelib import WeightWidth as WW
 from paibox.backend.conf_template import (
+    CoreConfig,
+    CorePlmConfig,
+    InputNeuronDest,
+    NeuronConfig,
+    NeuronDestInfo,
     export_core_params_json,
     export_core_plm_conf_json,
     export_input_conf_json,
@@ -11,47 +18,175 @@
     export_output_conf_json,
     export_used_L2_clusters,
 )
+from paibox.backend.types import AxonCoord, NeuSegment
+from paicorelib.reg_model import TICK_WAIT_END_MAX, TICK_WAIT_START_MAX
 
 try:
     import orjson as json
-
-    print("Use orjson")
 except ModuleNotFoundError:
     import json
 
-    print("Use json")
-
 
-class TestConfExport:
-    def test_export_core_params_json(self, ensure_dump_dir, MockCoreConfigDict):
+def _gen_random_core_config() -> CoreConfig:
+    wp = random.choice(list(WW))
+    lcn_ex = random.choice(list(LCN_EX))
+
+    iwf, swf, sme = random.choice(list(CoreMode)).conf
+
+    num_den = random.randint(1, HwConfig.N_DENDRITE_MAX_SNN)
+    mpe = random.choice(list(MaxPoolingEnable))
+    tws = random.randint(0, TICK_WAIT_START_MAX)
+    twe = random.randint(0, TICK_WAIT_END_MAX)
+    target_lcn = random.choice(list(LCN_EX))
+    test_chip_addr = Coord(random.randint(0, 31), random.randint(0, 31))
+
+    return CoreConfig(
+        "mock_core",
+        wp,
+        lcn_ex,
+        iwf,
+        swf,
+        num_den,
+        mpe,
+        tws,
+        twe,
+        sme,
+        target_lcn,
+        test_chip_addr,
+    )
+
+
+def _gen_random_neuron_config(n_per_channel: int, n_channel: int = 3) -> NeuronConfig:
+    n = n_channel * n_per_channel
+    offset = random.randint(1, 20)
+    interval = random.randint(1, 2)
+    thres = random.randint(1, 5)
+    reset_v = random.randint(-5, 5)
+    leak_v = np.arange(n_channel * n).reshape((n_channel, n))
+    neuron = pb.LIF((n_channel, n), thres, reset_v, bias=leak_v, keep_shape=True)
+    dest_coord_start = Coord(random.randint(0, 10), random.randint(0, 10))
+    test_chip_addr = Coord(random.randint(0, 31), random.randint(0, 31))
+
+    _n_start = random.randint(0, 10)
+    nseg = NeuSegment(
+        neuron, slice(_n_start, 1 * n_per_channel + _n_start), offset, interval
+    )
+
+    axon_coords = [AxonCoord(0, i) for i in range(nseg.n_neuron)]
+    dest_coords = [dest_coord_start, dest_coord_start + CoordOffset(0, 1)]
+    pb.BACKEND_CONFIG.test_chip_addr = test_chip_addr
+
+    return NeuronConfig.encapsulate(
+        nseg, axon_coords, dest_coords, pb.BACKEND_CONFIG.test_chip_addr
+    )
+
+
+def _gen_random_neuron_dest_info(n: int) -> NeuronDestInfo:
+    tick_relative = [0 for _ in range(n)]
+    addr_axon = [i for i in range(n)]
+
+    addr_core_x = random.randint(0, 31)
+    addr_core_y = random.randint(0, 31)
+    addr_core_x_ex = random.randint(0, 31)
+    addr_core_y_ex = random.randint(0, 31)
+    addr_chip_x = random.randint(0, 31)
+    addr_chip_y = random.randint(0, 31)
+
+    dest_info = {
+        "tick_relative": tick_relative,
+        "addr_axon": addr_axon,
+        "addr_core_x": addr_core_x,
+        "addr_core_y": addr_core_y,
+        "addr_core_x_ex": addr_core_x_ex,
+        "addr_core_y_ex": addr_core_y_ex,
+        "addr_chip_x": addr_chip_x,
+        "addr_chip_y": addr_chip_y,
+    }
+
+    return NeuronDestInfo.model_validate(dest_info, strict=True)
+
+
+def _gen_input_neuron_dest(n: int) -> InputNeuronDest:
+    tick_relative = [0 for _ in range(n)]
+    addr_axon = [i for i in range(n)]
+
+    addr_core_x = random.randint(0, 31)
+    addr_core_y = random.randint(0, 31)
+    addr_core_x_ex = random.randint(0, 31)
+    addr_core_y_ex = random.randint(0, 31)
+    addr_chip_x = random.randint(0, 31)
+    addr_chip_y = random.randint(0, 31)
+    lcn = 1 << random.choice(list(LCN_EX))
+
+    return InputNeuronDest(
+        tick_relative,
+        addr_axon,
+        addr_core_x,
+        addr_core_y,
+        addr_core_x_ex,
+        addr_core_y_ex,
+        addr_chip_x,
+        addr_chip_y,
+        lcn,
+    )
+
+
+def _gen_random_core_plm_config(n_neuron: int) -> CorePlmConfig:
+    thres = random.randint(1, 5)
+    reset_v = random.randint(-5, 5)
+    neuron = pb.IF((n_neuron,), thres, reset_v)
+
+    cpc = CorePlmConfig.encapsulate(
+        random.randint(0, 1000),
+        np.random.randint(
+            np.iinfo(np.uint64).min,
+            np.iinfo(np.uint64).max,
+            size=(512, 18),
+            dtype=np.uint64,
+        ),
+        _gen_random_core_config(),
+        {neuron: _gen_random_neuron_config(n_neuron, 1)},
+    )
+
+    return cpc
+
+
+class TestConfExporting:
+    def test_export_core_params_json(self, ensure_dump_dir):
         core_params = {
             Coord(1, 1): {
-                Coord(0, 0): MockCoreConfigDict,
-                Coord(0, 1): MockCoreConfigDict,
+                Coord(0, 0): _gen_random_core_config(),
+                Coord(0, 1): _gen_random_core_config(),
             },
-            Coord(2, 2): {Coord(0, 0): MockCoreConfigDict},
+            Coord(2, 2): {Coord(0, 0): _gen_random_core_config()},
         }
 
         export_core_params_json(core_params, ensure_dump_dir)
 
-    def test_NeuronConfig_conf_json(self, ensure_dump_dir, MockNeuronConfig):
-        nconf = MockNeuronConfig
+    @pytest.mark.parametrize("n_per_channel, n_channel", [(100, 3), (200, 2), (240, 1)])
+    def test_NeuronConfig_conf_json(self, ensure_dump_dir, n_per_channel, n_channel):
+        nconf = _gen_random_neuron_config(n_per_channel, n_channel)
         mock_n = pb.IF(1, 1)
         export_neuconf_json({mock_n: nconf}, ensure_dump_dir)
 
-    def test_export_input_conf_json(self, ensure_dump_dir, MockInputNeuronDest):
-        iconf = {"n1": MockInputNeuronDest}
+    @pytest.mark.parametrize("n_neuron", [100, 200, 300])
+    def test_export_input_conf_json(self, ensure_dump_dir, n_neuron):
+        iconf = {"n1": _gen_input_neuron_dest(n_neuron)}
         export_input_conf_json(iconf, ensure_dump_dir)
 
-    def test_export_output_conf_json(self, ensure_dump_dir, MockNeuronDestInfo):
-        oconf = {"n1": {0: MockNeuronDestInfo}}
+    @pytest.mark.parametrize("n_neuron", [100, 200, 300])
+    def test_export_output_conf_json(self, ensure_dump_dir, n_neuron):
+        oconf = {"n1": {0: _gen_random_neuron_dest_info(n_neuron)}}
         export_output_conf_json(oconf, ensure_dump_dir)
 
-    def test_export_core_plm_conf_json(self, ensure_dump_dir, MockCorePlmConfig):
+    @pytest.mark.parametrize("n_neuron", [100, 200, 300])
+    def test_export_core_plm_conf_json(self, ensure_dump_dir, n_neuron):
         chip_coord = Coord(1, 1)
         core_coord = Coord(10, 10)
 
-        core_plm_conf = {chip_coord: {core_coord: MockCorePlmConfig}}
+        core_plm_conf = {
+            chip_coord: {core_coord: _gen_random_core_plm_config(n_neuron)}
+        }
         export_core_plm_conf_json(core_plm_conf, ensure_dump_dir)
 
         with open(ensure_dump_dir / "core_plm.json", "rb") as f:

From 59294d545eae3c0552bccb3b871eeaaf4ba55fd7 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 26 Sep 2024 18:55:54 +0800
Subject: [PATCH 092/187] =?UTF-8?q?=E2=9C=A8=20return=20the=20packed=20wei?=
 =?UTF-8?q?ght=20mapped=20to=20the=20WRAM=20with=20shape=20(x,=2018),=20wh?=
 =?UTF-8?q?ere=20x=20<=3D=20512,=20instead=20of=20(512,=2018)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/placement.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 91a0001d..653905b9 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -472,11 +472,14 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
 
             This function was tested using only the prototype functions. For test items, please refer to                \
             tests/backend/test_placement.py::TestWeightRamMapping for details.
+
+        Return:
+            The packed matrix of weights mapped to the WRAM, with shape (x, 18) (x <= 512).
         """
         w_folded = self._fold_raw_weights(self.raw_weights)
         folded_row, _ = w_folded.shape
         # The 1152*512 unpacked weight, uint8 but only 0 & 1.
-        wram_unpacked = np.zeros(self.WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
+        # wram_unpacked = np.zeros(self.WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
 
         if is_iw8(self.rt_mode):
             # The length of slot for each bit of input data
@@ -495,9 +498,7 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
         )
 
         # (N, M)(int8) -> (M, N, 1)(uint8)
-        w_folded_3d = np.expand_dims(w_folded.T, axis=2).view(
-            WRAM_UNPACKED_DTYPE
-        )
+        w_folded_3d = np.expand_dims(w_folded.T, axis=2).view(WRAM_UNPACKED_DTYPE)
         for c in range(orig_col):
             for lcn in range(self.n_timeslot):
                 # For every column, unpack the array (N, 1) -> (N, n_weight_bits)
@@ -529,9 +530,10 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
 
         # For 8-bit input width, here is only the weight mapped to the WRAM. Extra neurons
         # paramaters will be mapped to the WRAM when exporting the configuration frames.
-        wram_unpacked[:, : w_mapped.shape[1]] = w_mapped
+        # wram_unpacked[:, : w_mapped.shape[1]] = w_mapped
 
-        return self._weight_pack(wram_unpacked)
+        # `w_mapped` is only the weight mapped to the WRAM. The shape[1] of `w_mapped` <= 512.
+        return self._weight_pack(w_mapped)
 
     @staticmethod
     def _nfold_weight(
@@ -566,20 +568,20 @@ def _nfold_weight(
 
     @staticmethod
     def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
-        """Convert the unpacked weights into a mapping format, corresponding to the WRAM address, each address      \
-            contains 18 uint64.
-            (1152, 512) -> T -> (512*18, 64) -> (512*18, 8) uint8 -> (512*18, 1) uint64 -> (512, 18) uint64.
+        """Convert the unpacked weights into a mapping format, corresponding to the WRAM address, each address contains \
+            18 uint64.
+            (1152, x) -> (x, 1152) -> (x*18, 64) -> (x*18, 8) uint8 -> (x*18, 1) uint64 -> (x, 18) uint64.
         """
         # #N of u64 on each NRAM address
         _n_u64_naddr = CorePlacement.WRAM_BASE_SHAPE[0] // N_BIT_PACKED_WEIGHT
 
         # Reshape to 64 columns to avoid contiguous problem.
         w_unpacked_aligned = w_unpacked.T.reshape((-1, N_BIT_PACKED_WEIGHT))
-        # (512*18, 64) uint8 -> (512*18, 8) uint8
+        # (x*18, 64) uint8 -> (x*18, 8) uint8
         w_packed_u8 = np.packbits(
             w_unpacked_aligned, axis=1, bitorder=HwConfig.WEIGHT_BITORDER
         )
-        # (512*18, 8) uint8 -> (512*18, 1) uint64 -> (512, 18) uint64
+        # (x*18, 8) uint8 -> (x*18, 1) uint64 -> (x, 18) uint64
         w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape((-1, _n_u64_naddr))
         w_packed_u64.setflags(write=False)
 

From 0fc38d691c1e3d09c520064a9a05340d2d0af1e0 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 26 Sep 2024 19:01:52 +0800
Subject: [PATCH 093/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20add=20`=5F=5Fgetit?=
 =?UTF-8?q?em=5F=5F`=20for=20`NeuSegment`=20&=20change=20some=20properties?=
 =?UTF-8?q?=20inside?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/segment_utils.py |  4 +-
 paibox/backend/types.py         | 73 ++++++++++++++++++++++++---------
 2 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/paibox/backend/segment_utils.py b/paibox/backend/segment_utils.py
index d5b78d2c..bcc492bf 100644
--- a/paibox/backend/segment_utils.py
+++ b/paibox/backend/segment_utils.py
@@ -134,7 +134,7 @@ def backtrack(i: int, cur_addr_offset: int, taken: NeuSegOfCorePlm) -> None:
                     repl_prop,
                 )
             )
-            cur_addr_offset += neu_segs_not_full[n_cur_reg].n_addr
+            cur_addr_offset += neu_segs_not_full[n_cur_reg].n_occupied_addr
             cur_n_neuron += neu_segs_not_full[n_cur_reg].n_neuron
             n_cur_reg += 1
 
@@ -205,7 +205,7 @@ def _find_neu_in_segs_of_cplm(neu: Neuron, seg_of_cplm: NeuSegOfCorePlm) -> bool
             require_new_cplm = True
 
             for seg_of_cplm in neu_segs_of_cb:
-                cur_addr_offset = sum([seg.n_addr for seg in seg_of_cplm])
+                cur_addr_offset = sum([seg.n_occupied_addr for seg in seg_of_cplm])
                 cur_n_neuron = sum([seg.n_neuron for seg in seg_of_cplm])
 
                 # Available to place & insert for the first time
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index f4a77780..a19ed15f 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -2,7 +2,7 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum, auto, unique
-from typing import NamedTuple, Union
+from typing import Any, NamedTuple, Union
 
 import numpy as np
 from numpy.typing import NDArray
@@ -110,37 +110,70 @@ class NeuSegment:
     offset: int
     repeat: int = 1
 
+    def __getitem__(self, s: slice) -> "NeuSegment":
+        # if isinstance(idx, int):
+        #     if idx < 0:
+        #         _idx = self.n_neuron + idx
+        #         if _idx < 0:
+        #             raise ValueError(f"index out of range: {idx} < 0")
+        #     else:
+        #         _idx = idx
+        #         if _idx > self.n_neuron - 1:
+        #             raise ValueError(f"index out of range: {idx} > {self.n_neuron-1}")
+
+        #     start = self.index.start + _idx
+        #     end = start + 1
+
+        #     return NeuSegment(
+        #         self.target,
+        #         NeuSlice(start, end, self.index.step),
+        #         self.offset + idx,
+        #         self.repeat,
+        #     )
+        _idx_start = s.start if s.start is not None else 0
+        if s.stop is None:
+            _idx_stop = self.n_neuron
+        elif s.stop < 0:
+            _idx_stop = self.n_neuron + s.stop
+        else:
+            _idx_stop = s.stop
+
+        if (_n_idx := _idx_stop - _idx_start) > self.n_neuron:
+            raise IndexError(f"index out of range: {_n_idx} > {self.n_neuron}")
+
+        start = self.index.start + _idx_start
+        end = self.index.start + _idx_stop
+
+        return NeuSegment(
+            self.target,
+            NeuSlice(start, end, self.index.step),
+            self.offset + _idx_start,
+            self.repeat,
+        )
+
     @property
     def n_neuron(self) -> int:
+        """#N of unique neurons in this segment."""
         return self.index.stop - self.index.start
 
     @property
-    def n_addr(self) -> int:
+    def n_occupied_addr(self) -> int:
+        """#N of neuron addresses the segment occupies in the RAM."""
         return self.repeat * self.n_neuron
 
+    @property
+    def attrs(self) -> dict[str, Any]:
+        return self.target._slice_attrs(self.index)
+    
     @property
     def addr_ram(self) -> list[int]:
         """Convert index of neuron into RAM address."""
-        return list(range(self.offset, self.addr_max, 1))
-
-    @property
-    def addr_max(self) -> int:
-        if (
-            _addr_max := self.offset + self.repeat * self.n_neuron
-        ) > HwConfig.ADDR_RAM_MAX + 1:
-            raise ValueError(
-                f"neuron RAM address out of range {HwConfig.ADDR_RAM_MAX + 1} ({_addr_max})."
-            )
-
-        return _addr_max
+        return list(range(self.offset, self.offset + self.n_occupied_addr, 1))
 
     @property
-    def addr_slice(self) -> slice:
-        """Display the RAM address in slice format."""
-        return slice(self.offset, self.addr_max, self.repeat)
-
-    def __str__(self) -> str:
-        return f"NeuSeg {self.target.name} at offset {self.offset}"
+    def _addr_ram_repr(self) -> slice:
+        """Represent the slice of neuron RAM address."""
+        return slice(self.offset, self.offset + self.n_occupied_addr, self.repeat)
 
 
 NeuSegOfCorePlm: TypeAlias = list[NeuSegment]

From 35f1a8d125cbcdbb79ca4b7745e59bd6937ea1d3 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 26 Sep 2024 19:07:00 +0800
Subject: [PATCH 094/187] =?UTF-8?q?=E2=9C=A8=20add=20static=20method=20`ne?=
 =?UTF-8?q?u=5Fparams=5Fmapping`=20in=20CP=20for=20neurons=20parameters=20?=
 =?UTF-8?q?mapping=20to=20the=20WRAM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/placement.py | 75 ++++++++++++++++++++++++++++++++++---
 paibox/backend/types.py     |  9 +++--
 2 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 653905b9..26d3124c 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -6,6 +6,7 @@
 import numpy as np
 from paicorelib import LCN_EX, ChipCoord, Coord, CoreMode, HwConfig, MaxPoolingEnable
 from paicorelib import WeightWidth as WW
+from paicorelib.framelib import OfflineFrameGen
 
 from paibox.components import FullConnectedSyn, Neuron
 from paibox.exceptions import GraphBuildError, ResourceError, TruncationWarning
@@ -17,6 +18,7 @@
 from .segment_utils import aligned_coords, get_axon_segments, get_neu_segments
 from .types import (
     _COORD_UNSET,
+    _RID_UNSET,
     WRAM_PACKED_DTYPE,
     WRAM_UNPACKED_DTYPE,
     N_BIT_PACKED_WEIGHT,
@@ -85,7 +87,7 @@ def __init__(
         self.target_lcn = LCN_EX.LCN_1X
         self._lcn_locked = False
         self.core_coords = []
-        self.chip_coord = Coord(_COORD_UNSET, _COORD_UNSET)
+        self.chip_coord = _COORD_UNSET
         self.core_placements = dict()
         self.axon_segments = dict()
         self.neuron_segs_of_cb = []
@@ -572,9 +574,6 @@ def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
             18 uint64.
             (1152, x) -> (x, 1152) -> (x*18, 64) -> (x*18, 8) uint8 -> (x*18, 1) uint64 -> (x, 18) uint64.
         """
-        # #N of u64 on each NRAM address
-        _n_u64_naddr = CorePlacement.WRAM_BASE_SHAPE[0] // N_BIT_PACKED_WEIGHT
-
         # Reshape to 64 columns to avoid contiguous problem.
         w_unpacked_aligned = w_unpacked.T.reshape((-1, N_BIT_PACKED_WEIGHT))
         # (x*18, 64) uint8 -> (x*18, 8) uint8
@@ -582,11 +581,74 @@ def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
             w_unpacked_aligned, axis=1, bitorder=HwConfig.WEIGHT_BITORDER
         )
         # (x*18, 8) uint8 -> (x*18, 1) uint64 -> (x, 18) uint64
-        w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape((-1, _n_u64_naddr))
+        w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape(
+            (w_unpacked.shape[1], -1)
+        )
         w_packed_u64.setflags(write=False)
 
         return w_packed_u64
 
+    @staticmethod
+    def neu_params_mapping(neu_confs: list[NeuronConfig]) -> WRAMPackedType:
+        """Map the extra neurons parameters to the WRAM. This only happens when the input width is 8 bits.
+
+        NOTE: This function was tested using only the prototype functions. For test items, please refer to              \
+            `tests/backend/test_placement.py::TestWeightRamMapping` for details.
+
+        Return:
+            The packed matrix of extra neurons parameters mapped to the WRAM, with shape (x, 18) (x <= 512).
+        """
+        neu_conf_params_list: list[WRAMUnpackedType] = []
+
+        for neu_conf in neu_confs:
+            neu_conf_params = np.zeros(
+                (neu_conf.neu_seg.n_neuron, NEURON_PARAMS_BIT_LENGTH),
+                dtype=WRAM_UNPACKED_DTYPE,
+            )
+
+            # Only the packges will be used.
+            frame3 = OfflineFrameGen.gen_config_frame3(
+                _COORD_UNSET,
+                _COORD_UNSET,
+                _RID_UNSET,
+                0,
+                neu_conf.neu_seg.n_neuron,
+                neu_conf.neuron_attrs,
+                neu_conf.neuron_dest_info,
+                1,
+            )
+
+            for i in range(neu_conf.neu_seg.n_neuron):
+                params = frame3.packages[i * 4 : (i + 1) * 4]
+                neu_conf_params[i, :] = np.unpackbits(
+                    params.view(WRAM_UNPACKED_DTYPE), axis=0, bitorder="little"
+                )[:NEURON_PARAMS_BIT_LENGTH]
+
+            neu_conf_params_list.append(neu_conf_params)
+
+        neu_params = np.vstack(neu_conf_params_list)
+
+        N_NEURON_PARAM_IN_COL = (
+            CorePlacement.WRAM_BASE_SHAPE[0] // NEURON_PARAMS_BIT_LENGTH
+        )
+        n_col_occupied, r = divmod(neu_params.shape[0], N_NEURON_PARAM_IN_COL)
+        if r > 0:
+            n_col_occupied += 1
+            neu_params = np.pad(neu_params, ((0, N_NEURON_PARAM_IN_COL - r), (0, 0)))
+
+        neu_params = neu_params.reshape((n_col_occupied, -1))
+
+        # (1152, y)
+        result = np.zeros(
+            (CorePlacement.WRAM_BASE_SHAPE[0], n_col_occupied),
+            dtype=WRAM_UNPACKED_DTYPE,
+        )
+        _n_bit_nparams = NEURON_PARAMS_BIT_LENGTH * N_NEURON_PARAM_IN_COL
+        result[:_n_bit_nparams] = neu_params.T
+
+        # (1152, y) -> (y, 18)
+        return CorePlacement._weight_pack(result)
+
     def export_param_config(self) -> CoreConfig:
         _mode_params = self.rt_mode.conf
 
@@ -807,3 +869,6 @@ def max_lcn_of_cb(cb: list[CoreBlock]) -> LCN_EX:
     FANOUT_IW8 = HwConfig.FANOUT_IW8  # type: ignore
 else:
     FANOUT_IW8 = [HwConfig.N_NEURON_MAX_ANN, 1364, 876, 512, 256, 128, 64, 32, 16, 8]
+
+
+NEURON_PARAMS_BIT_LENGTH = 214
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index a19ed15f..3ca72075 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -12,8 +12,8 @@
 else:
     from typing_extensions import TypeAlias
 
-from paicorelib import CoreMode, HwConfig
-
+from paicorelib import Coord, CoreMode
+from paicorelib import ReplicationId as RId
 from paibox.base import PAIBoxObject
 from paibox.components import FullConnectedSyn, InputProj, Neuron
 
@@ -53,7 +53,8 @@
 WRAMPackedType: TypeAlias = NDArray[WRAM_PACKED_DTYPE]
 N_BIT_PACKED_WEIGHT = WRAM_PACKED_DTYPE(1).nbytes * 8  # #N bits of packed weight
 
-_COORD_UNSET = 0
+_COORD_UNSET = Coord(0, 0)
+_RID_UNSET = RId(0, 0)
 _DEGREE_UNSET = -1
 
 
@@ -164,7 +165,7 @@ def n_occupied_addr(self) -> int:
     @property
     def attrs(self) -> dict[str, Any]:
         return self.target._slice_attrs(self.index)
-    
+
     @property
     def addr_ram(self) -> list[int]:
         """Convert index of neuron into RAM address."""

From ec81928b5a2af662eb8aa186d1259dcac724144e Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 26 Sep 2024 20:05:56 +0800
Subject: [PATCH 095/187] =?UTF-8?q?=E2=9C=A8=20move=20config=20exporting?=
 =?UTF-8?q?=20functions=20to=20`conf=5Fexporting.py`.=20Support=20exportin?=
 =?UTF-8?q?g=20neurons=20mapped=20to=20the=20WRAM.=20=20Some=20APIs=20chan?=
 =?UTF-8?q?ged?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_exporting.py | 357 +++++++++++++++++++++++++++
 paibox/backend/conf_template.py  | 411 ++++---------------------------
 paibox/backend/mapper.py         |  11 +-
 paibox/backend/placement.py      |   9 +-
 4 files changed, 412 insertions(+), 376 deletions(-)
 create mode 100644 paibox/backend/conf_exporting.py

diff --git a/paibox/backend/conf_exporting.py b/paibox/backend/conf_exporting.py
new file mode 100644
index 00000000..0f0adc40
--- /dev/null
+++ b/paibox/backend/conf_exporting.py
@@ -0,0 +1,357 @@
+import sys
+from collections import defaultdict
+from collections.abc import Sequence
+from pathlib import Path
+
+import numpy as np
+from paicorelib import ChipCoord, HwConfig, RoutingCoord
+from paicorelib.framelib import OfflineFrameGen
+from paicorelib.framelib.utils import _mask, np2bin, np2npy, np2txt
+
+from .placement import CorePlacement
+from paibox.components import Neuron
+from paibox.utils import reverse_8bit
+
+from .conf_template import (
+    _USE_ORJSON,
+    FRAME_DTYPE,
+    CoreConf,
+    CorePlmConf,
+    FrameArrayType,
+    InputNodeConf,
+    NeuronConfig,
+    OutputDestConf,
+)
+from .context import _BACKEND_CONTEXT
+from .types import _RID_UNSET
+
+if _USE_ORJSON:
+    from .conf_template import PAIConfigJsonDefault
+    import orjson
+else:
+    from .conf_template import PAIConfigJsonEncoder
+    import json
+
+
+__all__ = [
+    "gen_config_frames_by_coreconf",
+    "export_core_params_json",
+    "export_input_conf_json",
+    "export_output_conf_json",
+    "export_neuconf_json",
+    "export_core_plm_conf_json",
+    "export_used_L2_clusters",
+    "get_clk_en_L2_dict",
+]
+
+
+def gen_config_frames_by_coreconf(
+    config_dict: CorePlmConf,
+    write_to_file: bool,
+    fp: Path,
+    split_by_chip: bool,
+    formats: Sequence[str],
+) -> dict[ChipCoord, list[FrameArrayType]]:
+    """Generate configuration frames by given the `CorePlmConf`."""
+
+    def _write_to_f(name: str, array: FrameArrayType) -> None:
+        for format in formats:
+            _fp = (fp / name).with_suffix("." + format)  # don't forget "."
+            if format == "npy":
+                np2npy(_fp, array)
+            elif format == "bin":
+                np2bin(_fp, array)
+            else:
+                np2txt(_fp, array)
+
+    frame_arrays_total: dict[ChipCoord, list[FrameArrayType]] = defaultdict(list)
+
+    for chip_coord, conf_inchip in config_dict.items():
+        for core_coord, v in conf_inchip.items():
+            # 1. Only one config frame type I for each physical core.
+            config_frame_type1 = OfflineFrameGen.gen_config_frame1(
+                chip_coord, core_coord, _RID_UNSET, v.random_seed
+            )
+
+            # 2. Only one config frame type II for each physical core.
+            config_frame_type2 = OfflineFrameGen.gen_config_frame2(
+                chip_coord, core_coord, _RID_UNSET, v.params_reg
+            )
+
+            # 3. Iterate all the neuron segments inside the physical core.
+            # The meaning of 'n_neuron' in function 'gen_config_frame3' is the number of neurons in the NRAM.
+            config_frame_type3 = []
+            neu_conf_on_wram: list[NeuronConfig] = []
+
+            for neu_conf in v.neuron_configs.values():
+                if (
+                    neu_conf.neu_seg.offset + neu_conf.neu_seg.n_neuron
+                    <= HwConfig.ADDR_RAM_MAX + 1
+                ):
+                    # Place in the NRAM
+                    config_frame_type3.append(
+                        OfflineFrameGen.gen_config_frame3(
+                            chip_coord,
+                            core_coord,
+                            _RID_UNSET,
+                            neu_conf.neu_seg.offset,
+                            neu_conf.neu_seg.n_neuron,
+                            neu_conf.neuron_attrs,
+                            neu_conf.neuron_dest_info,
+                            neu_conf.neu_seg.repeat,
+                            # v.params_reg.n_repeat_nram,
+                            # XXX Is the parameter 'repeat' passed in from the previous step, or
+                            # is it calculated automatically in the parametric model?
+                            # Need to check this parameter?
+                        )
+                    )
+                else:
+                    # Only happens in ANN mode, where the repeat=1
+                    assert neu_conf.neu_seg.repeat == 1
+
+                    if (
+                        n_on_nram := HwConfig.ADDR_RAM_MAX + 1 - neu_conf.neu_seg.offset
+                    ) > 0:
+                        # Place in the NRAM partially & the rest in the WRAM
+                        neu_on_nram_conf = neu_conf[:n_on_nram]
+                        config_frame_type3.append(
+                            OfflineFrameGen.gen_config_frame3(
+                                chip_coord,
+                                core_coord,
+                                _RID_UNSET,
+                                neu_on_nram_conf.neu_seg.offset,
+                                neu_on_nram_conf.neu_seg.n_neuron,
+                                neu_on_nram_conf.neuron_attrs,
+                                neu_on_nram_conf.neuron_dest_info,
+                                neu_on_nram_conf.neu_seg.repeat,
+                            )
+                        )
+                        neu_conf_on_wram.append(neu_conf[n_on_nram:])
+                    else:
+                        # Place in the WRAM totally
+                        neu_conf_on_wram.append(neu_conf)
+
+            if config_frame_type3:
+                frame3 = np.concatenate(
+                    [f.value for f in config_frame_type3],
+                    dtype=FRAME_DTYPE,
+                    casting="no",
+                )
+            else:
+                frame3 = np.array([], dtype=FRAME_DTYPE)
+
+            _concat_frames = [
+                config_frame_type1.value,
+                config_frame_type2.value,
+                frame3,
+            ]
+            # 4. Only one config frame type IV for each physical core.
+            if v.params_reg.num_dendrite > 0:
+                # Weight part
+                config_frame_type4_w = OfflineFrameGen.gen_config_frame4(
+                    chip_coord,
+                    core_coord,
+                    _RID_UNSET,
+                    0,
+                    18 * v.weight_ram.shape[0],
+                    v.weight_ram,
+                )
+
+                _concat_frames.append(config_frame_type4_w.value)
+
+            if neu_conf_on_wram:
+                neu_on_wram = CorePlacement.neu_params_mapping(neu_conf_on_wram)
+                # Extra neurons part
+                assert v.weight_ram.shape[0] + neu_on_wram.shape[0] <= 512
+                config_frame_type4_n = OfflineFrameGen.gen_config_frame4(
+                    chip_coord,
+                    core_coord,
+                    _RID_UNSET,
+                    # Start after the weights mapped to the WRAM
+                    v.weight_ram.shape[0],
+                    18 * neu_on_wram.shape[0],
+                    neu_on_wram,
+                )
+
+                _concat_frames.append(config_frame_type4_n.value)
+
+            frame_arrays_total[chip_coord].append(
+                np.hstack(_concat_frames, casting="no")
+            )
+
+    if write_to_file:
+        if split_by_chip:
+            for chip, frame_arrays_onchip in frame_arrays_total.items():
+                f = np.hstack(frame_arrays_onchip, casting="no")
+                _write_to_f(f"config_chip{chip.address}_cores_all", f)
+        else:
+            _fa = []
+            for f in frame_arrays_total.values():
+                _fa.extend(f)
+
+            f = np.hstack(_concat_frames, casting="no")
+            _write_to_f("config_all", f)
+
+    return frame_arrays_total
+
+
+def _with_suffix_json(fp: Path, fname: str) -> Path:
+    return (fp / fname).with_suffix(".json")
+
+
+def export_core_params_json(core_conf: CoreConf, fp: Path) -> None:
+    _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["core_conf_json"])
+    _valid_conf = {}
+
+    for chip_coord, cconf in core_conf.items():
+        _valid_conf[str(chip_coord)] = {}
+        for core_coord, conf in cconf.items():
+            _valid_conf[str(chip_coord)][str(core_coord)] = conf.to_json()
+
+    if _USE_ORJSON:
+        with open(_full_fp, "wb") as f:
+            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
+    else:
+        with open(_full_fp, "w") as f:
+            json.dump(_valid_conf, f, indent=2)
+
+
+def export_input_conf_json(input_conf_info: InputNodeConf, fp: Path) -> None:
+    _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["input_conf_json"])
+    _valid_conf = {k: v.to_json() for k, v in input_conf_info.items()}
+
+    if _USE_ORJSON:
+        with open(_full_fp, "wb") as f:
+            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
+    else:
+        with open(_full_fp, "w") as f:
+            json.dump(_valid_conf, f, indent=2)
+
+
+def export_output_conf_json(output_conf_info: OutputDestConf, fp: Path) -> None:
+    _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["output_conf_json"])
+    if _USE_ORJSON:
+        with open(_full_fp, "wb") as f:
+            f.write(
+                orjson.dumps(
+                    output_conf_info,
+                    default=PAIConfigJsonDefault,
+                    option=orjson.OPT_NON_STR_KEYS | orjson.OPT_INDENT_2,
+                )
+            )
+    else:
+        with open(_full_fp, "w") as f:
+            json.dump(output_conf_info, f, indent=2, cls=PAIConfigJsonEncoder)
+
+
+if _USE_ORJSON:
+
+    def export_neuconf_json(
+        neuron_conf: dict[Neuron, NeuronConfig], fp: Path, fname: str = "neu_conf"
+    ) -> None:
+        _full_fp = _with_suffix_json(fp, fname)
+        _valid_conf = {
+            k.name: orjson.loads(v.to_json()) for k, v in neuron_conf.items()
+        }
+
+        with open(_full_fp, "wb") as f:
+            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
+
+else:
+
+    def export_neuconf_json(
+        neuron_conf: dict[Neuron, NeuronConfig], fp: Path, fname: str = "neu_conf"
+    ) -> None:
+        _full_fp = _with_suffix_json(fp, fname)
+        _valid_conf = {k.name: json.loads(v.to_json()) for k, v in neuron_conf.items()}
+
+        with open(_full_fp, "w") as f:
+            json.dump(_valid_conf, f, indent=2)
+
+
+def export_core_plm_conf_json(
+    core_plm_conf: CorePlmConf, fp: Path, fname: str = "core_plm"
+) -> None:
+    _full_fp = _with_suffix_json(fp, fname)
+    _valid_conf = {}
+
+    for chip_coord, cconf in core_plm_conf.items():
+        _valid_conf[str(chip_coord)] = {}
+        for core_coord, conf in cconf.items():
+            _valid_conf[str(chip_coord)][str(core_coord)] = conf.to_json()
+
+    if _USE_ORJSON:
+        with open(_full_fp, "wb") as f:
+            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
+    else:
+        with open(_full_fp, "w") as f:
+            json.dump(_valid_conf, f, indent=2)
+
+
+def export_used_L2_clusters(
+    clk_en_L2_dict: dict[ChipCoord, list[int]], fp: Path, fname: str = "used_L2"
+) -> None:
+    _full_fp = _with_suffix_json(fp, fname)
+    _valid_conf = {str(k): v for k, v in clk_en_L2_dict.items()}
+
+    if _USE_ORJSON:
+        with open(_full_fp, "wb") as f:
+            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
+    else:
+        with open(_full_fp, "w") as f:
+            json.dump(_valid_conf, f, indent=2)
+
+
+def get_clk_en_L2_dict(
+    chip_list: list[ChipCoord], used_L2: list[list[RoutingCoord]]
+) -> dict[ChipCoord, list[int]]:
+    """Generate serial port data for controlling the L2 cluster clocks of the chip.
+
+    Args:
+        - chip_list: the available chip list.
+        - used_L2: the routing coordinates of used L2 clusters in each chip.
+
+    Returns:
+        A dictionary of chip address & the corresponding L2 cluster clocks enable uint8 data.
+
+    NOTE: Serial port data for L2 cluster clocks enable:
+        #1 [7:0] L2 clk en #0~#7 (x=0b000, y=0b000) ~ (x=0b000, y=0b111)
+        #2 [7:0] L2 clk en #8~#15(x=0b001, y=0b000) ~ (x=0b001, y=0b111)
+        ...
+        #8 [7:0] L2 clk en #8~#15(x=0b111, y=0b000) ~ (x=0b111, y=0b111)
+    """
+
+    def L2_to_idx(L2: RoutingCoord) -> int:
+        x = sum(L2[i].value[0] << (2 - i) for i in range(3))
+        y = sum(L2[i].value[1] << (2 - i) for i in range(3))
+
+        return (x << 3) + y
+
+    def to_clk_en_L2_u8(L2_inchip: list[RoutingCoord]) -> list[int]:
+        clk_en = []
+        # L2_inchip is out of order
+        bitmap = sum(1 << L2_to_idx(l2) for l2 in L2_inchip)
+
+        for _ in range(8):
+            u8 = bitmap & _mask(8)
+            bitmap >>= 8
+            clk_en.append(reverse_8bit(u8))
+
+        return clk_en
+
+    if sys.version_info >= (3, 10):
+        iterator = zip(chip_list, used_L2, strict=True)
+    else:
+        if len(chip_list) != len(used_L2):
+            raise ValueError(
+                "the length of chip list & used L2 clusters must be equal, "
+                f"but {len(chip_list)} != {len(used_L2)}."
+            )
+
+        iterator = zip(chip_list, used_L2)
+
+    clk_en_L2_dict = dict()
+    for chip_addr, used_L2_inchip in iterator:
+        clk_en_L2_dict[chip_addr] = to_clk_en_L2_u8(used_L2_inchip)
+
+    return clk_en_L2_dict
diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
index fded7c47..75d86171 100644
--- a/paibox/backend/conf_template.py
+++ b/paibox/backend/conf_template.py
@@ -1,9 +1,6 @@
 import sys
-from collections import defaultdict
-from collections.abc import Sequence
 from dataclasses import asdict, dataclass
 from enum import Enum
-from pathlib import Path
 from typing import Any, NamedTuple, TypedDict, Union
 
 import numpy as np
@@ -13,7 +10,6 @@
     ChipCoord,
     Coord,
     CoordAddr,
-    HwConfig,
     InputWidthFormat,
     MaxPoolingEnable,
     NeuronAttrs,
@@ -21,17 +17,7 @@
     NeuronDestInfo,
     ParamsReg,
 )
-from paicorelib import ReplicationId as RId
-from paicorelib import (
-    RoutingCoord,
-    SNNModeEnable,
-    SpikeWidthFormat,
-    WeightWidth,
-    get_replication_id,
-)
-from paicorelib.framelib import OfflineFrameGen
-from paicorelib.framelib import types as flib_types
-from paicorelib.framelib.utils import _mask, np2bin, np2npy, np2txt
+from paicorelib import SNNModeEnable, SpikeWidthFormat, WeightWidth, get_replication_id
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias
@@ -39,9 +25,6 @@
     from typing_extensions import TypeAlias
 
 from paibox.components import Neuron
-from paibox.utils import reverse_8bit
-
-from .context import _BACKEND_CONTEXT
 from .types import AxonCoord, NeuSegment, NodeName, WRAMPackedType
 
 try:
@@ -81,13 +64,15 @@ def default(self, o: Any) -> Any:
 
 
 # Prevent import errors caused by changes in type definitions in paicorelib.
-if hasattr(flib_types, "FRAME_DTYPE"):
-    FRAME_DTYPE = flib_types.FRAME_DTYPE
+from paicorelib import framelib
+
+if hasattr(framelib.types, "FRAME_DTYPE"):
+    FRAME_DTYPE = framelib.types.FRAME_DTYPE
 else:
     FRAME_DTYPE = np.uint64
 
-if hasattr(flib_types, "FrameArrayType"):
-    FrameArrayType = flib_types.FrameArrayType
+if hasattr(framelib.types, "FrameArrayType"):
+    FrameArrayType = framelib.types.FrameArrayType
 else:
     FrameArrayType = NDArray[FRAME_DTYPE]
 
@@ -176,58 +161,25 @@ class OutputNeuronDest(NamedTuple):
     end: AxonCoord
 
 
-class NeuronConfig(NamedTuple):
-    _extra_params = (
-        "n_neuron",
-        "addr_ram",
-        "addr_offset",
-    )
+@dataclass(frozen=True)
+class NeuronConfig:
     """Extra parameters for debugging."""
 
-    n_neuron: int
-    addr_ram: list[int]
-    """RAM Address of neurons"""
-    addr_offset: int
-    "RAM starting address(offset)"
-    neuron_attrs: NeuronAttrs
-    neuron_dest_info: NeuronDestInfo
-
-    @classmethod
-    def encapsulate(
-        cls,
-        neu_seg: NeuSegment,
-        axon_coords: list[AxonCoord],
-        dest_core_coords: list[Coord],
-        dest_chip_coord: Coord,
-    ):
-        """Build the `NeuronConfig`.
-
-        Args:
-            - neu_seg: neuron segment.
-            - axon_segs: the destination axon segments.
-            - dest_core_coords: coordinates of the core of the destination axons.
-            - dest_chip_coord: coordinate of the chip of the destination axons.
-        """
-        attrs = NeuronAttrs.model_validate(
-            neu_seg.target._slice_attrs(neu_seg.index), strict=True
-        )
-        dest_rid = get_replication_id(dest_core_coords)
-
-        dest_info = NeuronDest(
-            [coord.tick_relative for coord in axon_coords],
-            [coord.addr_axon for coord in axon_coords],
-            dest_core_coords[0].x,
-            dest_core_coords[0].y,
-            dest_rid.x,
-            dest_rid.y,
-            dest_chip_coord.x,
-            dest_chip_coord.y,
-        )
-
-        neuron_dest_info = NeuronDestInfo.model_validate(asdict(dest_info), strict=True)
-
-        return cls(
-            neu_seg.n_neuron, neu_seg.addr_ram, neu_seg.offset, attrs, neuron_dest_info
+    neu_seg: NeuSegment
+    """Neuron segment."""
+    axon_coords: list[AxonCoord]
+    """The destination axon segments."""
+    dest_core_coords: list[Coord]
+    """Coordinates of the core of the destination axons."""
+    dest_chip_coord: Coord
+    """Coordinate of the chip of the destination axons."""
+
+    def __getitem__(self, s: slice) -> "NeuronConfig":
+        return NeuronConfig(
+            self.neu_seg[s],
+            self.axon_coords[s],
+            self.dest_core_coords,
+            self.dest_chip_coord,
         )
 
     def export(self) -> NeuronConf:
@@ -235,7 +187,11 @@ def export(self) -> NeuronConf:
 
     def to_json(self) -> Union[str, bytes]:
         """Dump the configs into json for debugging."""
-        dict_ = {var: getattr(self, var) for var in self._extra_params}
+        dict_ = {
+            "n_neuron": self.neu_seg.n_neuron,
+            "addr_offset": self.neu_seg.offset,
+            "addr_ram": self.neu_seg.addr_ram,
+        }
         dict_ |= self.export().model_dump(by_alias=True)
 
         if _USE_ORJSON:
@@ -245,6 +201,25 @@ def to_json(self) -> Union[str, bytes]:
         else:
             return json.dumps(dict_, indent=2, cls=PAIConfigJsonEncoder)
 
+    @property
+    def neuron_attrs(self) -> NeuronAttrs:
+        return NeuronAttrs.model_validate(self.neu_seg.attrs, strict=True)
+
+    @property
+    def neuron_dest_info(self) -> NeuronDestInfo:
+        dest_rid = get_replication_id(self.dest_core_coords)
+        dest_info = NeuronDest(
+            [coord.tick_relative for coord in self.axon_coords],
+            [coord.addr_axon for coord in self.axon_coords],
+            self.dest_core_coords[0].x,
+            self.dest_core_coords[0].y,
+            dest_rid.x,
+            dest_rid.y,
+            self.dest_chip_coord.x,
+            self.dest_chip_coord.y,
+        )
+        return NeuronDestInfo.model_validate(asdict(dest_info), strict=True)
+
 
 class CorePlmConfig(NamedTuple):
     _extra_params = ()
@@ -320,293 +295,3 @@ class GraphInfo(TypedDict):
     """The occupied cores, including used & wasted."""
     misc: dict[str, Any]
     """Miscellaneous information."""
-
-
-_RID_UNSET = RId(0, 0)
-
-
-def gen_config_frames_by_coreconf(
-    config_dict: CorePlmConf,
-    write_to_file: bool,
-    fp: Path,
-    split_by_chip: bool,
-    formats: Sequence[str],
-) -> dict[ChipCoord, list[FrameArrayType]]:
-    """Generate configuration frames by given the `CorePlmConfig`."""
-
-    def _write_to_f(name: str, array: FrameArrayType) -> None:
-        for format in formats:
-            _fp = (fp / name).with_suffix("." + format)  # don't forget "."
-            if format == "npy":
-                np2npy(_fp, array)
-            elif format == "bin":
-                np2bin(_fp, array)
-            else:
-                np2txt(_fp, array)
-
-    frame_arrays_total: dict[ChipCoord, list[FrameArrayType]] = defaultdict(list)
-
-    for chip_coord, conf_inchip in config_dict.items():
-        for core_coord, v in conf_inchip.items():
-            # 1. Only one config frame type I for each physical core.
-            config_frame_type1 = OfflineFrameGen.gen_config_frame1(
-                chip_coord, core_coord, _RID_UNSET, v.random_seed
-            )
-
-            # 2. Only one config frame type II for each physical core.
-            config_frame_type2 = OfflineFrameGen.gen_config_frame2(
-                chip_coord, core_coord, _RID_UNSET, v.params_reg
-            )
-
-            # 3. Iterate all the neuron segments inside the physical core.
-            # FIXME Unfortunately, at present, only the corresponding NRAM can be written based on
-            # the neuron configurations, and it cannot handle the case where the NRAM address is >= 512,
-            # that is, some neurons need to occupy the NRAM, which is inconsistent with the current logic.
-            # Additional neuron configurations has been written to the NRAM within the CorePlacement.
-            # NOTE The meaning of 'n_neuron' in function 'gen_config_frame3' is the number of neurons in
-            # the NRAM. See notes of function '_weight_ram_mapping' of `CorePlacement` in file
-            # backend/placement.py for details.
-            config_frame_type3 = []
-            for neu_conf in v.neuron_configs.values():
-                # The actual number of neurons placed in NRAM.
-                _n_neuron_nram = (
-                    HwConfig.ADDR_RAM_MAX + 1
-                    if neu_conf.n_neuron > HwConfig.ADDR_RAM_MAX + 1
-                    else neu_conf.n_neuron
-                )
-
-                config_frame_type3.append(
-                    OfflineFrameGen.gen_config_frame3(
-                        chip_coord,
-                        core_coord,
-                        _RID_UNSET,
-                        neu_conf.addr_offset,
-                        _n_neuron_nram,
-                        neu_conf.neuron_attrs,
-                        neu_conf.neuron_dest_info,
-                        v.params_reg.n_repeat_nram,
-                    )
-                )
-
-            if config_frame_type3:
-                frame3 = np.concatenate(
-                    [f.value for f in config_frame_type3],
-                    dtype=FRAME_DTYPE,
-                    casting="no",
-                )
-            else:
-                frame3 = np.array([], dtype=FRAME_DTYPE)
-
-            # 4. Only one config frame type IV for each physical core.
-            # NOTE To avoid logical complications, write the entire weights to the WRAM, rather than just the
-            # valid partial weights, because there are still some neurons configurations in the WRAM.
-            if v.params_reg.num_dendrite > 0:
-                config_frame_type4 = OfflineFrameGen.gen_config_frame4(
-                    chip_coord,
-                    core_coord,
-                    _RID_UNSET,
-                    0,
-                    18 * (HwConfig.ADDR_RAM_MAX + 1),
-                    v.weight_ram[: HwConfig.ADDR_RAM_MAX + 1],
-                )
-            else:  # empty core placement
-                config_frame_type4 = None
-
-            if config_frame_type4:
-                frame_arrays_total[chip_coord].append(
-                    np.concatenate(
-                        [
-                            config_frame_type1.value,
-                            config_frame_type2.value,
-                            frame3,
-                            config_frame_type4.value,
-                        ],
-                        dtype=FRAME_DTYPE,
-                        casting="no",
-                    )
-                )
-            else:
-                frame_arrays_total[chip_coord].append(
-                    np.concatenate(
-                        [config_frame_type1.value, config_frame_type2.value, frame3],
-                        dtype=FRAME_DTYPE,
-                        casting="no",
-                    )
-                )
-
-    if write_to_file:
-        if split_by_chip:
-            for chip, frame_arrays_onchip in frame_arrays_total.items():
-                f = np.concatenate(frame_arrays_onchip, dtype=FRAME_DTYPE, casting="no")
-                _write_to_f(f"config_chip{chip.address}_cores_all", f)
-        else:
-            _fa = []
-            for f in frame_arrays_total.values():
-                _fa.extend(f)
-
-            f = np.concatenate(_fa, dtype=FRAME_DTYPE, casting="no")
-            _write_to_f("config_all", f)
-
-    return frame_arrays_total
-
-
-def _with_suffix_json(fp: Path, fname: str) -> Path:
-    return (fp / fname).with_suffix(".json")
-
-
-def export_core_params_json(core_conf: CoreConf, fp: Path) -> None:
-    _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["core_conf_json"])
-    _valid_conf = {}
-
-    for chip_coord, cconf in core_conf.items():
-        _valid_conf[str(chip_coord)] = {}
-        for core_coord, conf in cconf.items():
-            _valid_conf[str(chip_coord)][str(core_coord)] = conf.to_json()
-
-    if _USE_ORJSON:
-        with open(_full_fp, "wb") as f:
-            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
-    else:
-        with open(_full_fp, "w") as f:
-            json.dump(_valid_conf, f, indent=2)
-
-
-def export_input_conf_json(input_conf_info: InputNodeConf, fp: Path) -> None:
-    _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["input_conf_json"])
-    _valid_conf = {k: v.to_json() for k, v in input_conf_info.items()}
-
-    if _USE_ORJSON:
-        with open(_full_fp, "wb") as f:
-            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
-    else:
-        with open(_full_fp, "w") as f:
-            json.dump(_valid_conf, f, indent=2)
-
-
-def export_output_conf_json(output_conf_info: OutputDestConf, fp: Path) -> None:
-    _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["output_conf_json"])
-    if _USE_ORJSON:
-        with open(_full_fp, "wb") as f:
-            f.write(
-                orjson.dumps(
-                    output_conf_info,
-                    default=PAIConfigJsonDefault,
-                    option=orjson.OPT_NON_STR_KEYS | orjson.OPT_INDENT_2,
-                )
-            )
-    else:
-        with open(_full_fp, "w") as f:
-            json.dump(output_conf_info, f, indent=2, cls=PAIConfigJsonEncoder)
-
-
-if _USE_ORJSON:
-
-    def export_neuconf_json(
-        neuron_conf: dict[Neuron, NeuronConfig], fp: Path, fname: str = "neu_conf"
-    ) -> None:
-        _full_fp = _with_suffix_json(fp, fname)
-        _valid_conf = {
-            k.name: orjson.loads(v.to_json()) for k, v in neuron_conf.items()
-        }
-
-        with open(_full_fp, "wb") as f:
-            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
-
-else:
-
-    def export_neuconf_json(
-        neuron_conf: dict[Neuron, NeuronConfig], fp: Path, fname: str = "neu_conf"
-    ) -> None:
-        _full_fp = _with_suffix_json(fp, fname)
-        _valid_conf = {k.name: json.loads(v.to_json()) for k, v in neuron_conf.items()}
-
-        with open(_full_fp, "w") as f:
-            json.dump(_valid_conf, f, indent=2)
-
-
-def export_core_plm_conf_json(
-    core_plm_conf: CorePlmConf, fp: Path, fname: str = "core_plm"
-) -> None:
-    _full_fp = _with_suffix_json(fp, fname)
-    _valid_conf = {}
-
-    for chip_coord, cconf in core_plm_conf.items():
-        _valid_conf[str(chip_coord)] = {}
-        for core_coord, conf in cconf.items():
-            _valid_conf[str(chip_coord)][str(core_coord)] = conf.to_json()
-
-    if _USE_ORJSON:
-        with open(_full_fp, "wb") as f:
-            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
-    else:
-        with open(_full_fp, "w") as f:
-            json.dump(_valid_conf, f, indent=2)
-
-
-def export_used_L2_clusters(
-    clk_en_L2_dict: dict[ChipCoord, list[int]], fp: Path, fname: str = "used_L2"
-) -> None:
-    _full_fp = _with_suffix_json(fp, fname)
-    _valid_conf = {str(k): v for k, v in clk_en_L2_dict.items()}
-
-    if _USE_ORJSON:
-        with open(_full_fp, "wb") as f:
-            f.write(orjson.dumps(_valid_conf, option=orjson.OPT_INDENT_2))
-    else:
-        with open(_full_fp, "w") as f:
-            json.dump(_valid_conf, f, indent=2)
-
-
-def _get_clk_en_L2_dict(
-    chip_list: list[ChipCoord], used_L2: list[list[RoutingCoord]]
-) -> dict[ChipCoord, list[int]]:
-    """Generate serial port data for controlling the L2 cluster clocks of the chip.
-
-    Args:
-        - chip_list: the available chip list.
-        - used_L2: the routing coordinates of used L2 clusters in each chip.
-
-    Returns:
-        A dictionary of chip address & the corresponding L2 cluster clocks enable uint8 data.
-
-    NOTE: Serial port data for L2 cluster clocks enable:
-        #1 [7:0] L2 clk en #0~#7 (x=0b000, y=0b000) ~ (x=0b000, y=0b111)
-        #2 [7:0] L2 clk en #8~#15(x=0b001, y=0b000) ~ (x=0b001, y=0b111)
-        ...
-        #8 [7:0] L2 clk en #8~#15(x=0b111, y=0b000) ~ (x=0b111, y=0b111)
-    """
-
-    def L2_to_idx(L2: RoutingCoord) -> int:
-        x = sum(L2[i].value[0] << (2 - i) for i in range(3))
-        y = sum(L2[i].value[1] << (2 - i) for i in range(3))
-
-        return (x << 3) + y
-
-    def to_clk_en_L2_u8(L2_inchip: list[RoutingCoord]) -> list[int]:
-        clk_en = []
-        # L2_inchip is out of order
-        bitmap = sum(1 << L2_to_idx(l2) for l2 in L2_inchip)
-
-        for _ in range(8):
-            u8 = bitmap & _mask(8)
-            bitmap >>= 8
-            clk_en.append(reverse_8bit(u8))
-
-        return clk_en
-
-    if sys.version_info >= (3, 10):
-        iterator = zip(chip_list, used_L2, strict=True)
-    else:
-        if len(chip_list) != len(used_L2):
-            raise ValueError(
-                "the length of chip list & used L2 clusters must be equal, "
-                f"but {len(chip_list)} != {len(used_L2)}."
-            )
-
-        iterator = zip(chip_list, used_L2)
-
-    clk_en_L2_dict = dict()
-    for chip_addr, used_L2_inchip in iterator:
-        clk_en_L2_dict[chip_addr] = to_clk_en_L2_u8(used_L2_inchip)
-
-    return clk_en_L2_dict
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 4f424b68..ec9f1a9d 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -19,13 +19,8 @@
     InputNeuronDest,
     InputNodeConf,
     OutputDestConf,
-    _get_clk_en_L2_dict,
-    export_core_params_json,
-    export_input_conf_json,
-    export_output_conf_json,
-    export_used_L2_clusters,
-    gen_config_frames_by_coreconf,
 )
+from .conf_exporting import *
 from .context import _BACKEND_CONTEXT, set_cflag
 from .graphs import (
     PAIGraph,
@@ -360,7 +355,7 @@ def config_export(self) -> GraphInfo:
             n_core_occupied=self.n_core_occupied,
             misc={
                 "name": self.graph.graph_name_repr,
-                "clk_en_L2": _get_clk_en_L2_dict(
+                "clk_en_L2": get_clk_en_L2_dict(
                     _BACKEND_CONTEXT["target_chip_addr"],
                     self.routing_tree.used_L2_clusters,
                 ),
@@ -637,7 +632,7 @@ def find_neuron(self, neuron: Neuron, *, verbose: int = 0) -> None:
                             print(
                                 f"{neuron.name} placed in {core_plm.coord}\n"
                                 f"N:        {neu_seg.n_neuron}\n"
-                                f"Address:  {neu_seg.addr_slice}"
+                                f"Address:  {neu_seg._addr_ram_repr}"
                             )
 
     def find_axon(self, neuron: Neuron, *, verbose: int = 0) -> None:
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 26d3124c..4d3dc71b 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -655,7 +655,7 @@ def export_param_config(self) -> CoreConfig:
         # fmt: off
         cb_config = CoreConfig(
             self.name,                          # name of the core
-            self.weight_width,             # weight_precision
+            self.weight_width,                  # weight_precision
             self.lcn_ex,                        # lcn_extension
             _mode_params[0],                    # input_width_format
             _mode_params[1],                    # spike_width_format
@@ -708,7 +708,7 @@ def export_neu_config(
             for ad in axon_dests:
                 dest_core_coords.extend(ad.core_coords)
 
-            config = NeuronConfig.encapsulate(
+            config = NeuronConfig(
                 neu_seg, axon_coords, dest_core_coords, axon_dests[0].chip_coord
             )
 
@@ -724,7 +724,7 @@ def export_neu_config(
                 for i in range(axon_addr_offset, axon_addr_offset + neu_seg.n_neuron)
             ]
 
-            config = NeuronConfig.encapsulate(
+            config = NeuronConfig(
                 neu_seg,
                 axon_coords,
                 [output_core_coord],
@@ -784,8 +784,7 @@ def twe(self) -> int:
 
     @property
     def n_working_dendrite(self) -> int:
-        """The number of actual working dendrites. IN ANN mode, the number of working   \
-            dendrites N <= 4096. In SNN mode, N <= 512.
+        """The number of actual working dendrites.
 
         NOTE: n_neuron * (2^comb_rate) = n_neuron << comb_rate
         """

From 51c9237d61cff38dac239a9a2f1fcdb07e8818fc Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 7 Oct 2024 11:11:39 +0800
Subject: [PATCH 096/187] =?UTF-8?q?=E2=9C=85=20rename=20&=20add=20test=20f?=
 =?UTF-8?q?or=20neurons=20mapping=20to=20the=20WRAM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...onf_template.py => test_conf_exporting.py} | 67 +++++++++++++++----
 tests/backend/test_mapper.py                  |  5 +-
 tests/backend/test_placement.py               |  2 +-
 3 files changed, 56 insertions(+), 18 deletions(-)
 rename tests/backend/{test_conf_template.py => test_conf_exporting.py} (78%)

diff --git a/tests/backend/test_conf_template.py b/tests/backend/test_conf_exporting.py
similarity index 78%
rename from tests/backend/test_conf_template.py
rename to tests/backend/test_conf_exporting.py
index 813836c1..bc33238c 100644
--- a/tests/backend/test_conf_template.py
+++ b/tests/backend/test_conf_exporting.py
@@ -11,16 +11,13 @@
     InputNeuronDest,
     NeuronConfig,
     NeuronDestInfo,
-    export_core_params_json,
-    export_core_plm_conf_json,
-    export_input_conf_json,
-    export_neuconf_json,
-    export_output_conf_json,
-    export_used_L2_clusters,
 )
+from paibox.backend.conf_exporting import *
 from paibox.backend.types import AxonCoord, NeuSegment
 from paicorelib.reg_model import TICK_WAIT_END_MAX, TICK_WAIT_START_MAX
 
+from .conftest import gen_random_used_lx
+
 try:
     import orjson as json
 except ModuleNotFoundError:
@@ -76,7 +73,7 @@ def _gen_random_neuron_config(n_per_channel: int, n_channel: int = 3) -> NeuronC
     dest_coords = [dest_coord_start, dest_coord_start + CoordOffset(0, 1)]
     pb.BACKEND_CONFIG.test_chip_addr = test_chip_addr
 
-    return NeuronConfig.encapsulate(
+    return NeuronConfig(
         nseg, axon_coords, dest_coords, pb.BACKEND_CONFIG.test_chip_addr
     )
 
@@ -194,10 +191,6 @@ def test_export_core_plm_conf_json(self, ensure_dump_dir, n_neuron):
             assert list(core_plm_conf_json.keys())[0] == str(chip_coord)
 
     def test_export_used_L2_clusters(self, ensure_dump_dir, monkeypatch):
-        from paibox.backend.conf_template import _get_clk_en_L2_dict
-
-        from .conftest import gen_random_used_lx
-
         clist = [Coord(0, 0), Coord(0, 1), Coord(2, 2)]
         monkeypatch.setattr(pb.BACKEND_CONFIG, "target_chip_addr", clist)
 
@@ -208,8 +201,54 @@ def test_export_used_L2_clusters(self, ensure_dump_dir, monkeypatch):
         for _ in range(len(clist)):
             used_L2.append(gen_random_used_lx(n, 2))
 
-        clk_en_L2_dict = _get_clk_en_L2_dict(
-            pb.BACKEND_CONFIG.target_chip_addr, used_L2
-        )
+        clk_en_L2_dict = get_clk_en_L2_dict(pb.BACKEND_CONFIG.target_chip_addr, used_L2)
 
         export_used_L2_clusters(clk_en_L2_dict, ensure_dump_dir)
+
+
+@pytest.mark.parametrize(
+    "index, offset, expected",
+    [
+        (slice(0, 200), 100, (slice(0, 200), None)),
+        (slice(200, 400), 512, (None, slice(200, 400))),
+        (slice(0, 600), 100, (slice(0, 412), slice(412, 600))),
+        (slice(100, 400), 300, (slice(100, 312), slice(312, 400))),
+    ],
+)
+def test_NeuronConfig_mapped_on_ram(index, offset, expected):
+    n = index.stop - index.start
+    neuron = pb.ANNNeuron((n,), bias=9, keep_shape=True)
+    dest_coord_start = Coord(random.randint(0, 10), random.randint(0, 10))
+
+    nseg = NeuSegment(neuron, index, offset)
+    axon_coords = [AxonCoord(0, i) for i in range(n)]
+    dest_coords = [dest_coord_start, dest_coord_start + CoordOffset(0, 1)]
+
+    neu_config1 = NeuronConfig(
+        nseg, axon_coords, dest_coords, pb.BACKEND_CONFIG.test_chip_addr
+    )
+
+    if (
+        neu_config1.neu_seg.offset + neu_config1.neu_seg.n_neuron
+        <= HwConfig.ADDR_RAM_MAX + 1
+    ):
+        result1 = neu_config1
+        result2 = None
+
+        assert result1.neu_seg.index == expected[0]
+        assert result2 == expected[1]
+
+    elif (n_on_nram := HwConfig.ADDR_RAM_MAX + 1 - neu_config1.neu_seg.offset) > 0:
+        s1 = slice(None, n_on_nram)
+        s2 = slice(n_on_nram, None)
+        result1 = neu_config1[s1]
+        result2 = neu_config1[s2]
+
+        assert result1.neu_seg.index == expected[0]
+        assert result2.neu_seg.index == expected[1]
+    else:
+        result1 = None
+        result2 = neu_config1
+
+        assert result1 == expected[0]
+        assert result2.neu_seg.index == expected[1]
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 73ec6dfe..ea0defce 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -6,7 +6,7 @@
 from paicorelib import WeightWidth as WW
 
 import paibox as pb
-from paibox.base import SynSys
+from paibox.backend.conf_exporting import *
 from paibox.exceptions import ResourceError
 
 from .conftest import TestData
@@ -344,11 +344,10 @@ def test_export_empty_cplm(self, build_example_net4_large_scale, ensure_dump_dir
 
 
 class TestMapper_Compile:
+    @pytest.mark.xfail(reason="change the hardware limit may cause unexpected errors.")
     def test_grouping_optim_latency(
         self, monkeypatch, build_Network_8bit_dense, ensure_dump_dir
     ):
-        from paibox.backend.conf_template import export_core_plm_conf_json
-
         monkeypatch.setattr(HwConfig, "N_NEURON_MAX_SNN", 8 * 8)
         monkeypatch.setattr(HwConfig, "N_FANIN_PER_DENDRITE_SNN", 6)
 
diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index 679c2089..a9eec14a 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -22,7 +22,7 @@
 from paibox.exceptions import ResourceError
 from paibox.types import WEIGHT_DTYPE, WeightType
 
-from .test_conf_template import _gen_random_neuron_dest_info
+from .test_conf_exporting import _gen_random_neuron_dest_info
 
 
 def _packbits_ref(bits: np.ndarray, count: Optional[int] = None) -> int:

From a3abd6aca1c03e40efe880016cefe8ecce56c176 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 8 Oct 2024 16:33:36 +0800
Subject: [PATCH 097/187] =?UTF-8?q?=F0=9F=9A=9A=20rename=20`conf=5Ftemplat?=
 =?UTF-8?q?e.py`=20to=20`conf=5Ftypes.py`=20=20&=20update=20ref?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_exporting.py                 | 16 +++++++++++-----
 .../backend/{conf_template.py => conf_types.py}  |  0
 paibox/backend/mapper.py                         |  2 +-
 paibox/backend/placement.py                      |  2 +-
 paibox/backend/routing.py                        |  2 +-
 tests/backend/test_conf_exporting.py             |  2 +-
 6 files changed, 15 insertions(+), 9 deletions(-)
 rename paibox/backend/{conf_template.py => conf_types.py} (100%)

diff --git a/paibox/backend/conf_exporting.py b/paibox/backend/conf_exporting.py
index 0f0adc40..ed2cfce0 100644
--- a/paibox/backend/conf_exporting.py
+++ b/paibox/backend/conf_exporting.py
@@ -12,7 +12,7 @@
 from paibox.components import Neuron
 from paibox.utils import reverse_8bit
 
-from .conf_template import (
+from .conf_types import (
     _USE_ORJSON,
     FRAME_DTYPE,
     CoreConf,
@@ -26,12 +26,14 @@
 from .types import _RID_UNSET
 
 if _USE_ORJSON:
-    from .conf_template import PAIConfigJsonDefault
     import orjson
+
+    from .conf_types import PAIConfigJsonDefault
 else:
-    from .conf_template import PAIConfigJsonEncoder
     import json
 
+    from .conf_types import PAIConfigJsonEncoder
+
 
 __all__ = [
     "gen_config_frames_by_coreconf",
@@ -112,7 +114,7 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                     if (
                         n_on_nram := HwConfig.ADDR_RAM_MAX + 1 - neu_conf.neu_seg.offset
                     ) > 0:
-                        # Place in the NRAM partially & the rest in the WRAM
+                        # Place in the NRAM partially
                         neu_on_nram_conf = neu_conf[:n_on_nram]
                         config_frame_type3.append(
                             OfflineFrameGen.gen_config_frame3(
@@ -126,6 +128,7 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                                 neu_on_nram_conf.neu_seg.repeat,
                             )
                         )
+                        # Place the rest in the WRAM
                         neu_conf_on_wram.append(neu_conf[n_on_nram:])
                     else:
                         # Place in the WRAM totally
@@ -162,7 +165,10 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
             if neu_conf_on_wram:
                 neu_on_wram = CorePlacement.neu_params_mapping(neu_conf_on_wram)
                 # Extra neurons part
-                assert v.weight_ram.shape[0] + neu_on_wram.shape[0] <= 512
+                assert (
+                    v.weight_ram.shape[0] + neu_on_wram.shape[0]
+                    <= HwConfig.ADDR_RAM_MAX + 1
+                )
                 config_frame_type4_n = OfflineFrameGen.gen_config_frame4(
                     chip_coord,
                     core_coord,
diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_types.py
similarity index 100%
rename from paibox/backend/conf_template.py
rename to paibox/backend/conf_types.py
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index ec9f1a9d..e685b358 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -11,7 +11,7 @@
 from paibox.exceptions import ConfigInvalidError, ResourceError
 from paibox.network import DynSysGroup
 
-from .conf_template import (
+from .conf_types import (
     CoreConf,
     CorePlmConf,
     FrameArrayType,
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 4d3dc71b..5f016f54 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -13,7 +13,7 @@
 from paibox.types import WEIGHT_DTYPE, WeightType
 from paibox.utils import check_attr_same
 
-from .conf_template import CoreConfig, CoreConfInChip, CorePlmConfig, NeuronConfig
+from .conf_types import CoreConfig, CoreConfInChip, CorePlmConfig, NeuronConfig
 from .context import _BACKEND_CONTEXT
 from .segment_utils import aligned_coords, get_axon_segments, get_neu_segments
 from .types import (
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index ac06ff23..b46e853f 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -12,7 +12,7 @@
 
 from paibox.exceptions import ResourceError, RoutingError
 
-from .conf_template import CorePlmConfInChip
+from .conf_types import CorePlmConfInChip
 from .placement import CoreBlock, CorePlacement, EmptyCorePlacement
 
 __all__ = ["RoutingGroup", "RoutingRoot"]
diff --git a/tests/backend/test_conf_exporting.py b/tests/backend/test_conf_exporting.py
index bc33238c..f120912c 100644
--- a/tests/backend/test_conf_exporting.py
+++ b/tests/backend/test_conf_exporting.py
@@ -5,7 +5,7 @@
 
 from paicorelib import Coord, CoordOffset, CoreMode, HwConfig, LCN_EX, MaxPoolingEnable
 from paicorelib import WeightWidth as WW
-from paibox.backend.conf_template import (
+from paibox.backend.conf_types import (
     CoreConfig,
     CorePlmConfig,
     InputNeuronDest,

From bce515fd692a256e36ca8d9a7f2aabc787f060ac Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 8 Oct 2024 16:34:52 +0800
Subject: [PATCH 098/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20slicing?=
 =?UTF-8?q?=20on=20slice=20objects?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_utils.py | 57 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index ef279954..e513ef60 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -37,3 +37,60 @@ def test_reverse_8bit(x, expected):
 )
 def test_reverse_16bit(x, expected):
     assert reverse_16bit(x) == expected
+
+
+@pytest.mark.parametrize(
+    "s1, idx, expected",
+    [
+        (slice(5, 10), 2, slice(7, 8)),
+        (slice(100, 200), 20, slice(120, 121)),
+        (slice(100, 200), -1, slice(199, 200)),
+        (slice(100, 200), -10, slice(190, 191)),
+    ],
+)
+def test_slice_by_index(s1, idx, expected):
+    n_s1 = s1.stop - s1.start
+    if idx < 0:
+        _idx = n_s1 + idx
+        if _idx < 0:
+            raise ValueError(f"index out of range: {idx} < 0")
+    else:
+        _idx = idx
+        if _idx > n_s1 - 1:
+            raise ValueError(f"index out of range: {idx} > {n_s1-1}")
+
+    start = s1.start + _idx
+    end = start + 1
+    new_slice = slice(start, end, s1.step)
+
+    assert new_slice == expected
+
+
+@pytest.mark.parametrize(
+    "s1, s2, expected",
+    [
+        (slice(5, 10), slice(0, 3), slice(5, 8)),
+        (slice(100, 200), slice(50, 100), slice(150, 200)),
+        (slice(100, 200), slice(None, 20), slice(100, 120)),
+        (slice(100, 200), slice(10, None), slice(110, 200)),
+        (slice(100, 300), slice(None, -40), slice(100, 260)),
+    ],
+)
+def test_slice_by_slice(s1, s2, expected):
+    n_s1 = s1.stop - s1.start
+    _s2_start = s2.start if s2.start is not None else 0
+    if s2.stop is None:
+        _s2_stop = n_s1
+    elif s2.stop < 0:
+        _s2_stop = n_s1 + s2.stop
+    else:
+        _s2_stop = s2.stop
+
+    if (_n_s2 := _s2_stop - _s2_start) > n_s1:
+        raise ValueError(f"index out of range: {_n_s2} > {n_s1}")
+
+    start = s1.start + _s2_start
+    end = s1.start + _s2_stop
+    new_slice = slice(start, end, s1.step)
+
+    assert new_slice == expected

From fc6d0a33ba354f3198323797b937b42b8d8848ea Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 10 Oct 2024 16:17:53 +0800
Subject: [PATCH 099/187] =?UTF-8?q?=F0=9F=90=9B=20fix=20exporting=20config?=
 =?UTF-8?q?=20to=20one=20file?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_exporting.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paibox/backend/conf_exporting.py b/paibox/backend/conf_exporting.py
index ed2cfce0..ac478fb1 100644
--- a/paibox/backend/conf_exporting.py
+++ b/paibox/backend/conf_exporting.py
@@ -191,11 +191,11 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                 f = np.hstack(frame_arrays_onchip, casting="no")
                 _write_to_f(f"config_chip{chip.address}_cores_all", f)
         else:
-            _fa = []
+            _fa_list = []
             for f in frame_arrays_total.values():
-                _fa.extend(f)
+                _fa_list.extend(f)
 
-            f = np.hstack(_concat_frames, casting="no")
+            f = np.hstack(_fa_list, casting="no")
             _write_to_f("config_all", f)
 
     return frame_arrays_total

From 4ab5a2c5a8d977552ea9ad0ac72aba90eef6b9fc Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 15 Oct 2024 20:23:11 +0800
Subject: [PATCH 100/187] =?UTF-8?q?=F0=9F=9A=9A=20rename=20function=20`=5F?=
 =?UTF-8?q?nearest=5Fmultiple=5Fabove`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/routing.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index b46e853f..765c9daa 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -1,4 +1,5 @@
 import itertools
+import math
 from collections.abc import Generator, Iterator, Sequence
 from typing import Any, Optional, Union, final
 
@@ -570,15 +571,15 @@ def get_insert_location(
     ) -> tuple[int, int, list[Direction]]:
         """Look for the insertion location of the incoming routing group."""
         n_core_wasted = n_core_incoming - n_core_required
-        # Look for n_core_aligned closest to cur_cost, where n_core_aligned = n*n_core_incoming
-        n_core_aligned = _closet_multiple_above(self.n_core_total, n_core_incoming)
+        # Look for n_core_aligned nearest to cur_cost, where n_core_aligned = n*n_core_incoming
+        n_core_aligned = _nearest_multiple_above(self.n_core_total, n_core_incoming)
 
         n_core_predicted = n_core_aligned + n_core_incoming
         n_core_inchip = _num_inchip(n_core_predicted)
 
         # If online cores are hit, start from the next chip
         if n_core_inchip - n_core_wasted > HwConfig.N_CORE_OFFLINE:
-            n_core_aligned = _closet_multiple_above(
+            n_core_aligned = _nearest_multiple_above(
                 n_core_aligned, HwConfig.N_CORE_MAX_INCHIP
             )
 
@@ -765,9 +766,10 @@ def dfs_preorder(
     return dfs_preorder(tree, cluster)
 
 
-def _closet_multiple_above(above: int, multiple: int) -> int:
-    """Return the closest number above n that is a multiple of x."""
-    return (above + multiple - 1) // multiple * multiple
+def _nearest_multiple_above(a: int, x: int) -> int:
+    """Return the nearest number greater than or equal to `a`, and is an integer multiple of `x`."""
+    # (above + multiple - 1) // multiple
+    return math.ceil(a / x) * x
 
 
 def _num_inchip(n: int) -> int:

From ab9fdf3d3117d034d5dca3295c2e7f6de48586e6 Mon Sep 17 00:00:00 2001
From: birdswimming <birdswimming3.14@gmail.com>
Date: Thu, 10 Oct 2024 22:03:38 +0800
Subject: [PATCH 101/187] route refactor

---
 paibox/backend/graphs.py     | 105 +++++-------------
 paibox/backend/mapper.py     |  47 +++++---
 paibox/backend/placement.py  |  46 ++++++--
 paibox/backend/routing.py    | 204 ++++++++++++++++++++++++++++++-----
 paibox/backend/types.py      |  37 ++++++-
 tests/backend/conftest.py    |   2 +-
 tests/backend/test_graphs.py |  32 +++---
 7 files changed, 327 insertions(+), 146 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 5912838b..c2609326 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -189,7 +189,7 @@ def untwist_branch_nodes(self) -> None:
         self._update_graph()
 
     def topo_support_check(self) -> None:
-        _degree_check(self.degree_of_nodes, self.succ_dg)
+        # _degree_check(self.degree_of_nodes, self.succ_dg)
 
         # Only support output nodes with <= 1152 neurons so far.
         if any(
@@ -213,83 +213,34 @@ def build_check(self) -> None:
         if not self.has_built:
             raise GraphBuildError("the graph hasn't been built yet.")
 
-    def graph_partition(self) -> list[PartitionedEdges]:
-        """Partition the graph. According to specific rules, the nodes in the graph are divided,    \
-            and the edges connected to these partitioned nodes will be returned as a set.
-
-        Return: a list of partitioned edges & a list of routing groups id.
-
-        TODO constraints in partitioning: iw, sw, snn_en, tws, twe, pool_max_en.
-        """
-        self.build_check()
-
-        gh_parts: list[PartitionedEdges] = []
-        rgid = 0  # routing group id
-        seen_nodes: set[NodeName] = set()
-
+    def graph_partition(self) -> list[RouteGroup]:
+        groups: list[SuccGroup] = list()
         for node in self.ordered_nodes:
-            if node in seen_nodes:
-                continue
-
-            if self.degree_of_nodes[node].out_degree == 0:
-                seen_nodes.add(node)
-                continue
-
-            succ_nodes: set[NodeName] = set()
-            # Other source nodes involved
-            other_involved_nodes: set[NodeName] = set()
-            # Successor candidate nodes
-            succ_nodes_candid: set[NodeName] = set(self.succ_dg[node].keys())
-            # Partitioned nodes
-            partitioned_nodes = set([node])
-
-            while len(succ_nodes_candid) > 0:
-                succ_nodes.update(succ_nodes_candid)
-
-                for candid in succ_nodes_candid:
-                    if self.degree_of_nodes[candid].in_degree > 1:
-                        coming_nodes = set(self.pred_dg[candid].keys()) - seen_nodes
-                        other_involved_nodes |= coming_nodes
-
-                other_involved_nodes -= partitioned_nodes
-                partitioned_nodes |= other_involved_nodes
-                succ_nodes_candid.clear()
-
-                for other_node in other_involved_nodes:
-                    other_candid = set(self.succ_dg[other_node].keys()) - succ_nodes
-                    succ_nodes_candid |= other_candid
-
-            seen_nodes |= partitioned_nodes
-
-            succ_edges_set: set[EdgeType] = set()
-            succ_nodes_set: set[NodeType] = set()
-
-            for _node in partitioned_nodes:
-                succ_edges_set.update(e.edge for e in self.succ_dg[_node].values())
-                succ_nodes_set.update(self._raw_nodes[n] for n in self.succ_dg[_node])
-
-            succ_nodes_lst: list[NodeType] = list(succ_nodes_set)
-            mode = succ_nodes_lst[0].mode
-            if any(mode != node.mode for node in succ_nodes_lst):
-                raise NotSupportedError("mixed mode is not supported.")
-
-            idx_of_sg = GraphNodeConstrs.tick_wait_attr_constr(succ_nodes_lst)
-
-            if len(idx_of_sg) > 0:
-                for idx in idx_of_sg:
-                    succ_edges_sg: set[EdgeType] = set()
-                    for i in idx:
-                        succ_edges_sg.update(
-                            e.edge
-                            for e in self.pred_dg[succ_nodes_lst[i].name].values()
-                        )
-                    gh_parts.append(PartitionedEdges(succ_edges_sg, rgid, rt_mode=mode))
-            else:
-                gh_parts.append(PartitionedEdges(succ_edges_set, rgid, rt_mode=mode))
-
-            rgid += 1
-
-        return gh_parts
+            succ_node_names = set(self.succ_dg[node].keys())
+            if len(succ_node_names) > 0:
+                succ_nodes = [self._raw_nodes[n] for n in succ_node_names]
+                succ_edges = [self.succ_dg[node][n.name].edge for n in succ_nodes]
+                groups.append(SuccGroup(succ_nodes, succ_edges, self._raw_nodes[node]))
+        
+        #并查集过程，将所有分组相互合并，合并条件是两个分组有交集，合并结果用RouteGroup表示
+        route_groups: list[RouteGroup] = list()
+        visited = set()
+        def dfs(group: SuccGroup, visited: set[SuccGroup], route_group: RouteGroup):
+            for other_group in groups:
+                if other_group not in visited and not set(group.nodes).isdisjoint(other_group.nodes):
+                    visited.add(other_group)
+                    route_group.add_group(other_group)
+                    dfs(other_group, visited, route_group)
+
+        for group in groups:
+            if group not in visited:
+                route_group = RouteGroup()
+                route_group.add_group(group)
+                visited.add(group)
+                dfs(group, visited, route_group)
+                route_groups.append(route_group)
+        
+        return route_groups
 
     def multicast_optim(
         self,
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index e685b358..13e54966 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -71,6 +71,10 @@ def clear(self) -> None:
         self.succ_core_blocks.clear()
         self.input_core_blocks.clear()
 
+        self.degrees_of_cb.clear()
+        self.routing_groups.clear()
+        self.succ_routing_groups.clear()
+
         self.core_params.clear()
         self.core_plm_config.clear()
 
@@ -178,11 +182,6 @@ def compile(
         """Group the axons of core block."""
         self.cb_axon_grouping()
 
-        # Convert core blocks to routing groups
-        self.routing_groups, self.succ_routing_groups = convert2routing_groups(
-            self.succ_core_blocks, self.degrees_of_cb, self.input_core_blocks
-        )
-
         """Core coordinate assignment."""
         self.coord_assign(core_estimate_only)
 
@@ -208,14 +207,14 @@ def untwist_branch_nodes(self) -> None:
 
     def build_core_blocks(self) -> None:
         """Build core blocks based on partitioned edges."""
-        partitioned_edges = self.graph.graph_partition()
-
-        for part in partitioned_edges:
-            self.core_blocks.append(
-                CoreBlock.build(
-                    *part.edges, routing_id=part.rg_id, rt_mode=part.rt_mode
-                )
-            )
+        route_groups = self.graph.graph_partition()
+        
+        for route_group in route_groups:
+            route_group.dump()
+            self.routing_groups.append(RoutingGroup(route_group))
+        
+        for rg in self.routing_groups:
+            self.core_blocks.extend(rg.core_blocks)
 
         for cur_cb in self.core_blocks:
             succ_cbs = []
@@ -237,6 +236,21 @@ def build_core_blocks(self) -> None:
 
         self.degrees_of_cb = get_node_degrees(self.succ_core_blocks)
 
+        for rg in self.routing_groups:
+            self.succ_routing_groups[rg] = []
+            rg_succ_cb: set[CoreBlock] = set()
+            for cb in rg:
+                rg_succ_cb.update(self.succ_core_blocks[cb])
+
+            for _rg in self.routing_groups:
+                if _rg == rg:
+                    continue
+                for cb in rg_succ_cb:
+                    if cb in _rg:
+                        self.succ_routing_groups[rg].append(_rg)
+                        break
+        
+
     def lcn_ex_adjustment(self) -> None:
         """Adjust the LCN of each core block & set target LCN."""
         # In the absence of the above complex situations, the following judgment is useless.
@@ -267,8 +281,8 @@ def lcn_ex_adjustment(self) -> None:
 
     def cb_axon_grouping(self) -> None:
         """The axons are grouped after the LCN has been modified & locked."""
-        for cb in self.core_blocks:
-            cb.group_axons()
+        for rg in self.routing_groups:
+            rg.group_axons()
 
     def graph_optimization(self) -> None:
         optimized = self.graph.graph_optimization(self.core_blocks, self.routing_groups)
@@ -291,6 +305,9 @@ def coord_assign(self, core_estimate_only: bool) -> None:
                 optim_target=_BACKEND_CONTEXT.cflags["grouping_optim_target"]
             )
 
+        for rg in self.routing_groups:
+            rg.sub_routing_group.set_config()
+            rg.sub_routing_group.dump()
         # Optimize the order of routing groups
         # self.routing_groups = reorder_routing_groups(self.succ_routing_groups)
         self.routing_groups = toposort(self.succ_routing_groups)
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 5f016f54..f6ed26e8 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -9,12 +9,13 @@
 from paicorelib.framelib import OfflineFrameGen
 
 from paibox.components import FullConnectedSyn, Neuron
-from paibox.exceptions import GraphBuildError, ResourceError, TruncationWarning
+from paibox.exceptions import GraphBuildError, ResourceError, TruncationWarning, NotSupportedError
 from paibox.types import WEIGHT_DTYPE, WeightType
 from paibox.utils import check_attr_same
 
 from .conf_types import CoreConfig, CoreConfInChip, CorePlmConfig, NeuronConfig
 from .context import _BACKEND_CONTEXT
+from .constrs import GraphNodeConstrs
 from .segment_utils import aligned_coords, get_axon_segments, get_neu_segments
 from .types import (
     _COORD_UNSET,
@@ -33,6 +34,8 @@
     WRAMPackedType,
     WRAMUnpackedType,
     is_iw8,
+    RouteGroup,
+    EdgeType,
 )
 
 
@@ -91,6 +94,7 @@ def __init__(
         self.core_placements = dict()
         self.axon_segments = dict()
         self.neuron_segs_of_cb = []
+        self.ordered_axons: list[SourceNodeType] = []
 
     def group_neurons(
         self, optim_target: Literal["latency", "core", "both"] = "both"
@@ -143,6 +147,11 @@ def _n_axon2lcn_ex(self) -> LCN_EX:
 
         return LCN_EX(lcn)
 
+    def assign(self, allocated: list[Coord], chip_coord: Coord) -> list[Coord]:
+        self.core_coords = allocated
+        self.chip_coord = chip_coord
+        return allocated, []
+
     def copy(self):
         raise NotImplementedError
 
@@ -276,12 +285,17 @@ def n_neuron_of_plm(self) -> list[int]:
             for neuron_segs in self.neuron_segs_of_cb
         ]
 
-    def group_axons(self) -> None:
+    def group_axons(self, multicast_axons: list[SourceNodeType] = list()) -> None:
         if not self._lcn_locked:
             raise GraphBuildError("get axon segments after 'lcn_ex' is locked.")
-
+        # Remove shared axons
+        axons = [ax for ax in self.axons if ax not in multicast_axons]
+        # More axons may be added to the axon list
+        axons = multicast_axons + axons
+        self.ordered_axons = axons
+        print(f"origin: {len(self.axons)}, ordered: {len(self.ordered_axons)}")
         self.axon_segments = get_axon_segments(
-            self.axons, self.n_timeslot, self.n_fanin_base
+            self.ordered_axons, self.n_timeslot, self.n_fanin_base
         )
 
     @cached_property
@@ -294,7 +308,7 @@ def raw_weight_of_dest(self) -> list[WeightType]:
             # The weights for each destination node.
             w_of_dest = []
 
-            for s in self.source:
+            for s in self.ordered_axons:
                 if syn := self._get_syn_of(s, d):
                     w_of_dest.append(syn.connectivity)
                 else:
@@ -371,6 +385,26 @@ def build(
 
         return cls(*synapses, routing_id=routing_id, mode=rt_mode, seed=seed)
 
+    @classmethod
+    def build_core_blocks(cls, route_group: RouteGroup) -> list["CoreBlock"]:
+        core_blocks:list[CoreBlock] = []
+        succ_nodes = list(route_group.nodes)
+        mode = succ_nodes[0].mode
+        if any (node.mode != mode for node in succ_nodes):
+            raise NotSupportedError("mixed mode is not supported.")
+        idx_of_sg = GraphNodeConstrs.tick_wait_attr_constr(succ_nodes)
+        route_group.set_inputs()
+        if len(idx_of_sg) == 0:
+            idx_of_sg = [list(range(len(succ_nodes)))]
+            
+        for idx in idx_of_sg:
+            succ_edges: set[EdgeType] = set()
+            for i in idx:
+                succ_edges.update(route_group.inputs[succ_nodes[i]])
+            core_block = CoreBlock.build(*succ_edges, routing_id = 0, rt_mode = mode)
+            core_blocks.append(core_block)
+        return core_blocks
+
     @classmethod
     def export_core_plm_config(cls, cb: "CoreBlock") -> CoreConfInChip:
         """Export the parameters of the core into a dictionary."""
@@ -792,7 +826,7 @@ def n_working_dendrite(self) -> int:
 
     @property
     def source(self) -> list[SourceNodeType]:
-        return self.parent.source
+        return self.parent.ordered_axons
 
     @property
     def dest(self) -> list[DestNodeType]:
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 765c9daa..ed7aa9cf 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -14,10 +14,23 @@
 from paibox.exceptions import ResourceError, RoutingError
 
 from .conf_types import CorePlmConfInChip
+from paibox.exceptions import ResourceError, RoutingError, GraphBuildError
 from .placement import CoreBlock, CorePlacement, EmptyCorePlacement
-
+from .types import *
 __all__ = ["RoutingGroup", "RoutingRoot"]
 
+def Coord2RoutingCoord(coord: Coord) -> RoutingCoord:
+    directions: list[Direction] = []
+    x = coord.x
+    y = coord.y
+
+    for i in range(MAX_ROUTING_PATH_LENGTH):
+        # 每个循环，提取最高位（移动了 4-i 位）到最低位，恢复 value_x 和 value_y
+        shift = 4 - i
+        value_x = (x >> shift) & 0b1  # 取出当前位的值
+        value_y = (y >> shift) & 0b1
+        directions.append(Direction((value_x, value_y)))
+    return RoutingCoord(*directions)    
 
 class RoutingCluster:
     def __init__(
@@ -458,6 +471,128 @@ def routing_coord(self) -> RoutingCoord:
 
         return RoutingCoord(*reversed(path))
 
+# each sub routing group should be able to route by single coord
+class SubRoutingGroup:
+    index = 0
+    def __init__(self, unorder_elements: list["CoreBlock|SubRoutingGroup"], ordered_elements: list["SubRoutingGroup"]) -> None:
+        self.unorder_elements:list["CoreBlock|SubRoutingGroup"] = unorder_elements
+        self.ordered_elements:list["SubRoutingGroup"] = ordered_elements
+        self.routing_elements:list["CoreBlock|SubRoutingGroup"] = unorder_elements + ordered_elements
+        self.offset:list[int] = list()
+        self.n_core_required:int = 0
+        self.tail_wasted:int = 0
+        self.name = f"SubRoutingGroup[{SubRoutingGroup.index}]"
+        axons:set[SourceNodeType] = set()
+        for element in self.routing_elements:
+            axons.update(element.axons)
+        self.axons:list[SourceNodeType] = list(axons)
+        SubRoutingGroup.index += 1
+    
+    def set_config(self):
+        for element in self.routing_elements:
+            if isinstance(element, SubRoutingGroup):
+                element.set_config()
+
+        # unorder elements sorted from big to small, avoiding assigning waste.
+        unorder_elements = sorted(self.unorder_elements, key=lambda x: x.n_core_required, reverse=True)
+        ordered_elements = self.ordered_elements
+        for element in unorder_elements:
+            n_core_required = element.n_core_required
+            self.offset.append(self.n_core_required)
+            self.n_core_required += n_core_required
+        
+        # ordered elements should be assgined first
+        for element in ordered_elements:
+            n_core_required = element.n_core_required
+            n_core_assigned = _nearest_multiple_above(self.n_core_required, n_core_required)
+            self.offset.append(n_core_assigned)
+            self.n_core_required = n_core_assigned + n_core_required
+        
+        #routing elements should satisfy topological order
+        self.routing_elements:list["CoreBlock|SubRoutingGroup"] = unorder_elements + ordered_elements
+        
+        
+        sub_tail_wasted = 0 if isinstance(self.routing_elements[-1], CoreBlock) else self.routing_elements[-1].tail_wasted
+        assigned_n_core_required = 1 << (self.n_core_required - 1).bit_length()
+        self.tail_wasted += assigned_n_core_required - self.n_core_required + sub_tail_wasted
+        self.n_core_required = assigned_n_core_required
+        
+        
+    # return Coord that wasted in subrouting group
+    def assign(self, allocated: list[Coord], chip_coord: Coord) -> tuple[list[Coord], list[Coord]]:
+        cur_i = 0
+        assigned_coords:list[Coord] = []
+        wasted_coords:list[Coord] = []
+        for element, offset in zip(self.routing_elements, self.offset):
+            if offset > cur_i:
+                wasted_coords = wasted_coords + allocated[cur_i : offset]
+            cur_i = offset
+            
+            n = element.n_core_required
+            print(f"element: {element.name}, {n} cores, start at {Coord2RoutingCoord(allocated[cur_i])}")
+            assigned, wasted = element.assign(allocated[cur_i : cur_i + n], chip_coord)
+            assigned_coords = assigned_coords + assigned
+            wasted_coords = wasted_coords + wasted
+            cur_i += n
+        return assigned_coords, wasted_coords + allocated[cur_i:]
+    
+    # use list to keep the order of axons
+    def group_axons(self, multicast_axons: list[SourceNodeType]) -> None:
+        private_multicast_axons = multicast_axons.copy()
+        axons_count:list[int] = [0] * len(self.axons)
+        for element in self.routing_elements:
+            for axon in element.axons:
+                idx = self.axons.index(axon)
+                axons_count[idx] += 1
+        for i, axon in enumerate(self.axons):
+            if axons_count[i] > 1 and axon not in private_multicast_axons:
+                private_multicast_axons.append(axon)
+        
+        for element in self.routing_elements:
+            element.group_axons(private_multicast_axons)
+        
+    @property
+    def core_blocks(self) -> list[CoreBlock]:
+        cbs = []
+        for element in self.routing_elements:
+            if isinstance(element, CoreBlock):
+                cbs.append(element)
+            else:
+                cbs += element.core_blocks
+        return cbs
+    
+    @classmethod
+    def build(cls, route_group: RouteGroup) -> "SubRoutingGroup":
+        
+        if len(route_group.nodes) == 0:
+            return None
+        sub_group = RouteGroup()
+        remaining_group = RouteGroup()
+        for group in route_group.groups:
+            if group.input in route_group.nodes:
+                sub_group.add_group(group)
+            else:
+                remaining_group.add_group(group)
+                
+        remaining_group.nodes = remaining_group.nodes - sub_group.nodes
+        unorder_elements:list[CoreBlock] = CoreBlock.build_core_blocks(remaining_group)
+        ordered_elements:list[SubRoutingGroup] = []
+        sub_routing_group: SubRoutingGroup = SubRoutingGroup.build(sub_group)
+        if sub_routing_group is not None:
+            ordered_elements = [sub_routing_group]
+        return cls(unorder_elements, ordered_elements)
+    
+    def dump(self, i:int = 0):
+        tabs = "\t" * i
+        print(f"{tabs}SubRoutingGroup: {self.name} with {self.n_core_required} cores:")
+        for element in self.routing_elements:
+            if isinstance(element, SubRoutingGroup):
+                element.dump(i+1)
+            else:
+                print(f"{tabs}\t{element.name} with {element.n_core_required} cores:")
+                for edge in element._parents:
+                    print(f"{tabs}\t\t{edge.name}: {edge.source.name} -> {edge.target.name}")
+                
 
 class RoutingGroup:
     """Core blocks located within a routing group are routable.
@@ -465,29 +600,25 @@ class RoutingGroup:
     NOTE: Axon groups within a routing group are the same.
     """
 
-    def __init__(self, *cb: CoreBlock) -> None:
-        self.core_blocks = list(cb)
+    def __init__(self, route_group:RouteGroup) -> None:
+        self.sub_routing_group: SubRoutingGroup = SubRoutingGroup.build(route_group)
+        self.core_blocks = self.sub_routing_group.core_blocks
         self.assigned_coords: list[Coord] = []
         """Assigned core coordinates in the routing group"""
         self.wasted_coords: list[Coord] = []
         """Wasted core coordinates in routing group"""
         self.wasted_core_plm: dict[Coord, EmptyCorePlacement] = {}
         """Wasted core placements"""
+        self.sub_n_core_wasted = 0
 
     def assign(
-        self, assigned: list[Coord], wasted: list[Coord], chip_coord: Coord
+        self, allocated: list[Coord], chip_coord: Coord
     ) -> None:
+        print(f"route_group: {self.sub_routing_group.name} assigned from {Coord2RoutingCoord(allocated[0])}")
+        assigned, wasted = self.sub_routing_group.assign(allocated, chip_coord)
         self.assigned_coords = assigned
         self.wasted_coords = wasted
 
-        # Assign the coordinates to each core block inside the routing group.
-        cur_i = 0
-        for cb in self:
-            n = cb.n_core_required
-            cb.core_coords = assigned[cur_i : cur_i + n]
-            cb.chip_coord = chip_coord
-            cur_i += n
-
     def core_block_alloc(self) -> None:
         for cb in self:
             cb.core_plm_alloc()
@@ -511,6 +642,14 @@ def n_core_required(self) -> int:
         """The actual number of cores required by the routing group."""
         return sum(cb.n_core_required for cb in self)
 
+    @property
+    def n_core_cost(self) -> int:
+        return self.sub_routing_group.n_core_required
+    
+    @property
+    def tail_wasted(self) -> int:
+        return self.sub_routing_group.tail_wasted
+
     @property
     def routing_cost(self) -> RoutingCost:
         return get_routing_consumption(self.n_core_required)
@@ -537,6 +676,13 @@ def __getitem__(self, idx: int) -> CoreBlock:
     def __iter__(self) -> Iterator[CoreBlock]:
         return self.core_blocks.__iter__()
 
+    def group_axons(self) -> None:
+        for cb in self.core_blocks:
+            if not cb._lcn_locked:
+                raise GraphBuildError("get axon segments after 'lcn_ex' is locked.")
+        self.sub_routing_group.group_axons([])
+        
+        
 
 @final
 class RoutingRoot:
@@ -567,11 +713,9 @@ def get_leaf_coord(
         raise RoutingError(f"get leaf {leaf.tag} coordinate failed.")
 
     def get_insert_location(
-        self, n_core_incoming: int, n_core_required: int
+        self, n_core_incoming: int, n_core_wasted: int
     ) -> tuple[int, int, list[Direction]]:
         """Look for the insertion location of the incoming routing group."""
-        n_core_wasted = n_core_incoming - n_core_required
-        # Look for n_core_aligned nearest to cur_cost, where n_core_aligned = n*n_core_incoming
         n_core_aligned = _nearest_multiple_above(self.n_core_total, n_core_incoming)
 
         n_core_predicted = n_core_aligned + n_core_incoming
@@ -609,20 +753,25 @@ def place_routing_group(self, routing_group: RoutingGroup) -> None:
         """Place a routing group in the chip list. Assign each core blocks with routing coordinates &   \
             make sure they are routable.
         """
-        n_core_req = routing_group.n_core_required
-        n_core_cost = 1 << (n_core_req - 1).bit_length()  # n_core_req <= 2^X
-
-        if n_core_req > HwConfig.N_CORE_OFFLINE:
+        print(f"Routing Group:")
+        for cb in routing_group:
+            print(f"\t{cb.name}")
+        
+        n_core_cost = routing_group.n_core_cost
+        tail_wasted = routing_group.tail_wasted
+        n_core_req = n_core_cost - tail_wasted
+        print(f"\tcost: {n_core_cost}, tail_wasted: {tail_wasted}")
+
+        if  n_core_req > HwConfig.N_CORE_OFFLINE:
             raise ResourceError(
                 "the number of cores required by the routing group exceeds the hardware limit, "
-                f"{n_core_cost} > {HwConfig.N_CORE_OFFLINE}."
+                f"{n_core_req} > {HwConfig.N_CORE_OFFLINE}."
             )
 
         core_insert_loc, chip_idx_loc, rpath_start = self.get_insert_location(
-            n_core_cost, n_core_req
+            n_core_cost, tail_wasted
         )
-        valid_coords = []
-        wasted_coords = []
+        allocated_coords:list[Coord] = []
 
         for i, rpath in _routing_path_generator(n_core_cost, rpath_start):
             leaf_coord = RoutingCoord(*reversed(rpath))
@@ -630,13 +779,10 @@ def place_routing_group(self, routing_group: RoutingGroup) -> None:
             if (core_insert_loc + i) % (HwConfig.N_SUB_ROUTING_NODE**Level.L2) == 0:
                 L2_coord = RoutingCoord(*reversed(rpath[Level.L2 :]))
                 self.used_L2_clusters[chip_idx_loc].append(L2_coord)
+            allocated_coords.append(leaf_coord.to_coord())
 
-            if i < n_core_req:
-                valid_coords.append(leaf_coord.to_coord())
-            else:
-                wasted_coords.append(leaf_coord.to_coord())
-
-        routing_group.assign(valid_coords, wasted_coords, self.chip_list[chip_idx_loc])
+        routing_group.assign(allocated_coords, self.chip_list[chip_idx_loc])
+        print()
 
     def insert_routing_group(self, routing_group: RoutingGroup) -> bool:
         """Insert a `RoutingGroup` in the routing tree. Assign each core blocks with \
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 3ca72075..a398a49f 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -36,6 +36,8 @@
     "AxonCoord",
     "AxonSegment",
     "CoreAbstract",
+    "SuccGroup",
+    "RouteGroup",
 ]
 
 NodeName: TypeAlias = str
@@ -103,7 +105,40 @@ class PartitionedEdges(NamedTuple):
 
 NeuSlice: TypeAlias = slice
 
-
+class SuccGroup:
+    # edge for input to nodes[i] is edges[i]
+    def __init__(self, nodes:list[NodeType], edges:list[EdgeType], input:NodeType):
+        self.nodes = nodes
+        self.edges = edges
+        self.input = input
+
+class RouteGroup:
+    def __init__(self):
+        self.groups: list[SuccGroup] = list()
+        self.nodes: set[NodeType] = set()
+        self.inputs: dict[NodeType, list[EdgeType]] = dict()
+        
+    def add_group(self, group:SuccGroup):
+        self.groups.append(group)
+        self.nodes.update(group.nodes)
+    
+    def set_inputs(self):
+        for group in self.groups:
+            for node, edge in zip(group.nodes, group.edges):
+                if node not in self.inputs.keys():
+                    self.inputs[node] = list()
+                assert edge.dest.name == node.name
+                self.inputs[node].append(edge)
+    
+    def dump(self):
+        print("RouteGroup:")
+        for group in self.groups:
+            print(f"\tGroup: of {group.input.name}")
+            for node, edge in zip(group.nodes, group.edges):
+                print(f"\t\tnode: {node.name}, edge: {edge.name}: {edge.source.name} -> {edge.dest.name}")
+        print("\tNodes:")
+        for node in self.nodes:
+            print(f"\t\tnode: {node.name}")
 @dataclass(frozen=True)
 class NeuSegment:
     target: DestNodeType
diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index d6bfcb30..c4bf6f54 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -668,7 +668,7 @@ def __init__(self):
         self.n3 = pb.IF((800,), 10, name="n3", tick_wait_start=2)
         self.n4 = pb.IF((1000,), 10, name="n4", tick_wait_start=3)
         self.n5 = pb.IF((800,), 10, name="n5", tick_wait_start=4)
-        self.n6 = pb.IF((1000,), 10, name="n6", tick_wait_start=4)
+        self.n6 = pb.IF((200,), 10, name="n6", tick_wait_start=4)
 
         self.s1 = pb.FullConn(self.inp1, self.n1, name="s1")
         self.s2 = pb.FullConn(self.n1, self.n2, name="s2")
diff --git a/tests/backend/test_graphs.py b/tests/backend/test_graphs.py
index dc030b73..fb1fddd5 100644
--- a/tests/backend/test_graphs.py
+++ b/tests/backend/test_graphs.py
@@ -259,19 +259,17 @@ def test_untwist_branch_nodes1(
         mapper = pb.Mapper()
         mapper.build(net)
 
-        try:
-            mapper.compile(no_twisted_branch=no_twisted_branch)
-        except NotSupportedError:
-            # A certain sturcture in the network is not supported.
-            assert no_twisted_branch == False
-            return
+        mapper.compile(no_twisted_branch=no_twisted_branch)
 
         mapper.export(fp=ensure_dump_dir)
-
-        assert (
-            len(mapper.graph.nodes)
-            == len(net.nodes(level=1).include(Neuron, pb.InputProj)) + net.n_copy
-        )
+        
+        if no_twisted_branch:
+            assert (
+                len(mapper.graph.nodes)
+                == len(net.nodes(level=1).include(Neuron, pb.InputProj)) + net.n_copy
+            )
+        else:
+            assert len(mapper.graph.nodes) == len(net.nodes(level=1).include(Neuron, pb.InputProj))
 
 
 class TestGroupEdges:
@@ -457,12 +455,12 @@ def test_group_edges_with_constrs(
         mapper = pb.Mapper()
         mapper.clear()
         mapper.build(net)
-        partitioned_edges = mapper.graph.graph_partition()
+        mapper.compile(no_twisted_branch=False)
 
         # In this case, N2 & N3 should be together.
         pos_n2 = pos_n3 = 0
-        for i, part in enumerate(partitioned_edges):
-            _g_with_name = [e.name for e in part.edges]
+        for i, cb in enumerate(mapper.core_blocks):
+            _g_with_name = [e.name for e in cb._parents]
             if "s2" in _g_with_name:
                 pos_n2 = i
             if "s3" in _g_with_name:
@@ -477,11 +475,11 @@ def test_group_edges_with_constrs(
 
         mapper.clear()
         mapper.build(net)
-        partitioned_edges = mapper.graph.graph_partition()
+        mapper.compile(no_twisted_branch=False)
 
         pos_n2 = pos_n3 = 0
-        for i, part in enumerate(partitioned_edges):
-            _g_with_name = [e.name for e in part.edges]
+        for i, part in enumerate(mapper.core_blocks):
+            _g_with_name = [e.name for e in part._parents]
             if "s2" in _g_with_name:
                 pos_n2 = i
             if "s3" in _g_with_name:

From 149032f78ab415a0b008ebe0b53eb333d61433c3 Mon Sep 17 00:00:00 2001
From: birdswimming <birdswimming3.14@gmail.com>
Date: Wed, 16 Oct 2024 14:56:21 +0800
Subject: [PATCH 102/187] modify typing

---
 paibox/backend/routing.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index ed7aa9cf..08a2bc0a 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -474,10 +474,10 @@ def routing_coord(self) -> RoutingCoord:
 # each sub routing group should be able to route by single coord
 class SubRoutingGroup:
     index = 0
-    def __init__(self, unorder_elements: list["CoreBlock|SubRoutingGroup"], ordered_elements: list["SubRoutingGroup"]) -> None:
-        self.unorder_elements:list["CoreBlock|SubRoutingGroup"] = unorder_elements
+    def __init__(self, unorder_elements: list[Union[CoreBlock,"SubRoutingGroup"]], ordered_elements: list["SubRoutingGroup"]) -> None:
+        self.unorder_elements:list[Union[CoreBlock,"SubRoutingGroup"]] = unorder_elements
         self.ordered_elements:list["SubRoutingGroup"] = ordered_elements
-        self.routing_elements:list["CoreBlock|SubRoutingGroup"] = unorder_elements + ordered_elements
+        self.routing_elements:list[Union[CoreBlock,"SubRoutingGroup"]] = unorder_elements + ordered_elements
         self.offset:list[int] = list()
         self.n_core_required:int = 0
         self.tail_wasted:int = 0
@@ -509,7 +509,7 @@ def set_config(self):
             self.n_core_required = n_core_assigned + n_core_required
         
         #routing elements should satisfy topological order
-        self.routing_elements:list["CoreBlock|SubRoutingGroup"] = unorder_elements + ordered_elements
+        self.routing_elements:list[Union[CoreBlock,"SubRoutingGroup"]] = unorder_elements + ordered_elements
         
         
         sub_tail_wasted = 0 if isinstance(self.routing_elements[-1], CoreBlock) else self.routing_elements[-1].tail_wasted

From 70507553e6bda755166e8b697770df8622329e7d Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 23 Oct 2024 11:30:32 +0800
Subject: [PATCH 103/187] =?UTF-8?q?=F0=9F=A7=B0=20chore:=20fix=20pre-commi?=
 =?UTF-8?q?t=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/pytest-ci.yml | 2 +-
 .pre-commit-config.yaml         | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pytest-ci.yml b/.github/workflows/pytest-ci.yml
index a5bfbb4b..533bcf76 100644
--- a/.github/workflows/pytest-ci.yml
+++ b/.github/workflows/pytest-ci.yml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       matrix:
         python-version: ["3.9", "3.10", "3.11", "3.12"]
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, windows-latest]
     runs-on: ${{ matrix.os }}
 
     steps:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6794b316..126df612 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,20 +10,20 @@ repos:
     rev: 5.13.2
     hooks:
       - id: isort
-        stages: [Nonepre-commitNone]
+        stages: [pre-commit]
 
   - repo: https://github.com/psf/black
     rev: 24.8.0
     hooks:
       - id: black
-        stages: [Nonepre-commitNone]
+        stages: [pre-commit]
 
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
         types_or: [markdown, yaml, json]
-        stages: [Nonepre-commitNone]
+        stages: [pre-commit]
 
   - repo: https://github.com/dannysepler/rm_unneeded_f_str
     rev: v0.2.0

From e03493e7c942d174669c0572715b8df10e13e09f Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 23 Oct 2024 11:34:54 +0800
Subject: [PATCH 104/187] =?UTF-8?q?=E2=9C=85=20add=20onboard=20test=20case?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/onboard/.gitignore      |    2 +
 tests/onboard/README.md       |  273 ++++++
 tests/onboard/test_onboard.py | 1647 +++++++++++++++++++++++++++++++++
 3 files changed, 1922 insertions(+)
 create mode 100644 tests/onboard/.gitignore
 create mode 100644 tests/onboard/README.md
 create mode 100644 tests/onboard/test_onboard.py

diff --git a/tests/onboard/.gitignore b/tests/onboard/.gitignore
new file mode 100644
index 00000000..9530332c
--- /dev/null
+++ b/tests/onboard/.gitignore
@@ -0,0 +1,2 @@
+config
+data
\ No newline at end of file
diff --git a/tests/onboard/README.md b/tests/onboard/README.md
new file mode 100644
index 00000000..fbbfb8fa
--- /dev/null
+++ b/tests/onboard/README.md
@@ -0,0 +1,273 @@
+# OnBoard Tests
+
+## ANN权重映射
+
+### 无神经元在WRAM
+
+#### 001 单层W=8E=1
+
+测试设置：
+
+- 一层Linear，8bit权重，扇入扩展E=1，Linear尺寸(144, 400)
+- 设置Linear `tws=1`，`twe=2`
+- 运行5时间步，监测L1输出
+
+检查：
+
+1. 在输出数据中，`ts=1,2` 时刻有数据，`ts>2` 时刻无数据（0）
+2. 有效输出数据应与参考输出在每个时间步相等
+
+#### 002 单层W=8E=4
+
+测试设置：
+
+- 一层Linear，8bit权重，扇入扩展E=4，Linear尺寸(500, 100)
+- 设置Linear `tws=1`，`twe=0`
+- 运行5时间步，监测L1输出
+
+检查：
+
+1. 在输出数据中，每个时刻均有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等
+
+#### 003 单层W=2E=2
+
+测试设置：
+
+- 一层Linear，2bit权重，扇入扩展E=2，Linear尺寸(240, 200)
+- 设置Linear `bias=99`， `tws=1 `，`twe=0`
+- 运行5时间步，监测L1输出
+
+检查：
+
+1. 在输出数据中，每个时刻均有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等
+
+#### 004 单层W=4E=2
+
+测试设置：
+
+- 一层Linear，4bit权重，扇入扩展E=2，Linear尺寸(240, 500)
+- 设置Linear `tws=1`，`twe=0`
+- 运行5时间步，监测L1输出
+
+检查：
+
+1. 在输出数据中，每个时刻均有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等
+
+#### 005 多层-1
+
+测试设置：
+
+- 两层Linear，均为8bit权重，扇入扩展E1=4，L1尺寸(200,200)；E2=4，L2尺寸(200,10)
+- L2层 `bias=2`，``bit_trunc=9``
+- 设置L1 `tws=1`，`twe=0`；L2 `tws=2`，`twe=0`
+- 运行5时间步，监测L2输出
+
+检查：
+
+1. 在输出数据中，从第2时刻开始才有有效数据。注意每个时间步L2的输入是上一个时刻的L1输出
+2. 有效输出数据应与参考输出在每个时间步相等
+
+#### 006 多层-2
+
+测试设置：
+
+- 两层Linear，均为1bit权重，扇入扩展E1=2，L1尺寸(240,100)；E2=1，L2尺寸(100,10)
+- 设置L1 `tws=1`，`twe=0`；L2 `tws=2`，`twe=0`
+- 运行5时间步，监测L2输出
+
+检查：
+
+1. 在输出数据中，从第2时刻开始才有有效数据。注意每个时间步L2的输入是上一个时刻的L1输出
+2. 有效输出数据应与参考输出在每个时间步相等
+
+### 有神经元在WRAM
+
+#### 007 单层W=4E=1 *芯片不输出
+
+测试设置：
+
+- 一层Linear，4bit权重，扇入扩展E=1，Linear尺寸(120, 800)，此时可容纳876
+- 设置Linear `tws=1`，`twe=0`
+- 运行5时间步，监测L1输出
+
+检查：
+
+1. 在输出数据中，每个时刻均有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等
+
+#### 008 多层-1 *芯片不输出
+
+测试设置：
+
+- 两层Linear，第一层4bit权重，扇入扩展E=1，Linear尺寸(120, 600)，此时可容纳876。第二层8bit权重，扇入扩展E=8，Linear尺寸(600, 10)
+- 设置Linear `tws=1`，`twe=0`；L2 `tws=2`，`twe=0`
+- 运行5时间步，监测L2输出
+
+检查：
+
+1. 在输出数据中，从第2时刻开始才有有效数据。注意每个时间步L2的输入是上一个时刻的L1输出
+2. 有效输出数据应与参考输出在每个时间步相等
+
+## SNN算子
+
+### Conv1d
+
+#### 001 Conv1d
+
+测试设置：
+
+- 仅一层IF，conv1d突触连接，输入特征图尺寸(8, 100)，卷积核尺寸(4, 8, 8)，s=1, p=0，输出特征图尺寸(4, 93)
+- 设置IF正阈值10，`tws=1`，`twe=0`
+- 运行5时间步，监测IF输出
+
+检查：
+
+1. 在输出数据中，每个时刻均有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等
+
+## 半折叠算子
+
+### 单层
+
+#### 001 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 输入尺寸：(1, 64, 64)
+- 卷积核尺寸：(4, 1, 7, 7)
+- stride：1
+- padding：0
+- 输出尺寸：(4, 58, 58)
+- 运行时间步：65，监测输出
+
+检查：
+
+1. 在输出数据中，每个时刻均有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等
+
+#### 002 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 输入尺寸：(8, 64, 64)
+- 卷积核尺寸：(4, 8, 7, 7)
+- stride：2
+- padding：0
+- 输出尺寸：(4, 29, 29)
+- 运行时间步：65，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
+
+#### 003 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 输入尺寸：(8, 64, 64)
+- 卷积核尺寸：(4, 8, 3, 3)
+- stride：1
+- padding：1
+- 输出尺寸：(4, 64, 64)
+- 运行时间步：65，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
+
+#### 004 MaxPool2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d最大池化
+- 输入尺寸：(3, 32, 32)
+- 池化核尺寸：(2, 2)
+- stride：2
+- padding：0
+- 输出尺寸：(3, 16, 16)
+- 运行时间步：32，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
+
+#### 005 AvgPool2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d平均池化
+- 输入尺寸：(3, 32, 32)
+- 池化核尺寸：(2, 2)
+- stride：2
+- padding：0
+- 输出尺寸：(3, 16, 16)
+- 运行时间步：32，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
+
+### 多层
+
+#### 006 Conv2dSemiFoldedNet
+
+测试设置：
+
+- 三层：半折叠2d卷积 + 半折叠2d卷积 + 半折叠全连接
+- 输入尺寸：(3, 32, 32)
+- 第一层卷积核尺寸：(4, 3, 3, 3)，stride：1，padding：1
+- 第二层卷积核尺寸：(4, 4, 3, 3)，stride：1，padding：1
+- 第三层全连接尺寸：(4\*32\*32, 10)
+- 输出尺寸：(10,)
+- 运行时间步：40，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
+
+#### 007 Conv2dSemiFoldedNet
+
+测试设置：
+
+- 三层：半折叠2d卷积 + 半折叠2d卷积 + 半折叠全连接
+- 输入尺寸：(3, 32, 32)
+- 第一层卷积核尺寸：(4, 3, 4, 4)，stride：2，padding：1
+- 第二层卷积核尺寸：(4, 4, 4, 4)，stride：2，padding：1
+- 第三层全连接尺寸：(4\*8\*8, 10)
+- 输出尺寸：(10,)
+- 运行时间步：40，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
+
+#### 008 CNNSemiFoldedNet
+
+测试设置：
+
+- 五层：半折叠2d卷积 + 半折叠2d最大池化 + 半折叠2d卷积 + 半折叠2d最大池化 + 半折叠全连接
+- 输入尺寸：(3, 32, 32)
+- 第一层卷积核尺寸：(4, 3, 3, 3)，stride：1，padding：1
+- 第二层池化核尺寸：(2, 2)，stride：2，padding：0
+- 第三层卷积核尺寸：(4, 4, 3, 3)，stride：1，padding：1
+- 第四层池化核尺寸：(2, 2)，stride：2，padding：0
+- 第五层全连接尺寸：(4\*8\*8, 10)
+- 输出尺寸：(10,)
+- 运行时间步：42，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
diff --git a/tests/onboard/test_onboard.py b/tests/onboard/test_onboard.py
new file mode 100644
index 00000000..89e1eb8d
--- /dev/null
+++ b/tests/onboard/test_onboard.py
@@ -0,0 +1,1647 @@
+import paibox as pb
+from paibox.components.neuron.base import MetaNeuron
+from paibox.types import NEUOUT_U8_DTYPE, VOLTAGE_DTYPE, NeuOutType, VoltageType
+import pytest
+import numpy as np
+from pathlib import Path
+
+from tests.components.utils import conv1d_golden
+
+TEST_DIR = Path(__file__).parent
+DATA_DIR = TEST_DIR / "data"
+CONFIG_DIR = TEST_DIR / "config"
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+
+FIXED_RNG = np.random.default_rng(seed=42)
+
+
+def _out_bypass1(t, data1, *args, **kwargs):
+    return data1
+
+
+def _ann_bit_trunc(v_array: VoltageType, bit_trunc: int = 8) -> NeuOutType:
+    return np.where(v_array <= 0, 0, MetaNeuron._truncate(v_array, bit_trunc)).astype(
+        NEUOUT_U8_DTYPE
+    )
+
+
+class TestOnBoard_WRAMMapping:
+    def test_001(self):
+        class Net001(pb.Network):
+            def __init__(self, w):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape[0],))
+                # Start at ts=1, end at ts=1+2=3
+                self.l1 = pb.Linear(
+                    self.i1, shape[1], w, tick_wait_start=1, tick_wait_end=2
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+
+        TEST_NAME = self.test_001.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+        shape = (144, 400)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-10, 10, size=shape, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 5, size=(sim_time, shape[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape[1]), dtype=np.uint8)
+
+        network = Net001(weight1)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            if i < 2:
+                # At ts = 1 & 2, there is output data
+                ref = _ann_bit_trunc(
+                    inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE),
+                    bit_trunc=network.l1.bit_trunc,
+                )
+                assert np.array_equal(sim.data[network.p1][i], ref)
+            else:
+                # At ts > 2, linear is not working, no output data
+                ref = np.zeros_like(sim.data[network.p1][i])
+
+            assert np.array_equal(sim.data[network.p1][i], ref)
+
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p1][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_002(self):
+        class Net002(pb.Network):
+            def __init__(self, w):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape[0],))
+                # Start at ts=1, no end
+                self.l1 = pb.Linear(
+                    self.i1, shape[1], w, tick_wait_start=1, tick_wait_end=0
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+
+        TEST_NAME = self.test_002.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+        shape = (500, 100)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-10, 10, size=shape, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 5, size=(sim_time, shape[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape[1]), dtype=np.uint8)
+
+        network = Net002(weight1)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            ref = _ann_bit_trunc(
+                inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE),
+                bit_trunc=network.l1.bit_trunc,
+            )
+            assert np.array_equal(sim.data[network.p1][i], ref)
+
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p1][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_003(self):
+        class Net003(pb.Network):
+            def __init__(self, w):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape[0],))
+                # Start at ts=1, no end
+                self.l1 = pb.Linear(
+                    self.i1, shape[1], w, bias=99, tick_wait_start=1, tick_wait_end=0
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+
+        TEST_NAME = self.test_003.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+        shape = (240, 200)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=2, enable weight bit optimization
+            weight1 = FIXED_RNG.integers(-2, 2, size=shape, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 2, size=(sim_time, shape[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape[1]), dtype=np.uint8)
+
+        network = Net003(weight1)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            ref = _ann_bit_trunc(
+                # Use bias in linear
+                inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE)
+                + network.l1.bias,
+                bit_trunc=network.l1.bit_trunc,
+            )
+            assert np.array_equal(sim.data[network.p1][i], ref)
+
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p1][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=True)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_004(self):
+        class Net004(pb.Network):
+            def __init__(self, w):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape[0],))
+                # Start at ts=1, no end
+                self.l1 = pb.Linear(
+                    self.i1, shape[1], w, tick_wait_start=1, tick_wait_end=0
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+
+        TEST_NAME = self.test_004.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+        shape = (240, 500)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=4, enable weight bit optimization
+            weight1 = FIXED_RNG.integers(-8, 8, size=shape, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 5, size=(sim_time, shape[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape[1]), dtype=np.uint8)
+
+        network = Net004(weight1)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            ref = _ann_bit_trunc(
+                inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE),
+                bit_trunc=network.l1.bit_trunc,
+            )
+            assert np.array_equal(sim.data[network.p1][i], ref)
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p1][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=True)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_005(self):
+        class Net005(pb.Network):
+            def __init__(self, w1, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape1[0],))
+                # Start at ts=1, no end
+                self.l1 = pb.Linear(
+                    self.i1, shape1[1], w1, tick_wait_start=1, tick_wait_end=0
+                )
+                # Start at ts=2, no end
+                self.l2 = pb.Linear(
+                    self.l1,
+                    shape2[1],
+                    w2,
+                    bias=2,
+                    bit_trunc=9,
+                    tick_wait_start=2,
+                    tick_wait_end=0,
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+                self.p2 = pb.Probe(self.l2, "feature_map")
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_005.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (200, 200)
+        shape2 = (shape1[1], 10)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            weight2 = npz["weight2"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-5, 5, size=shape1, dtype=np.int8)
+            # W=4
+            weight2 = FIXED_RNG.integers(-15, 15, size=shape2, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 5, size=(sim_time, shape1[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape2[1]), dtype=np.uint8)
+
+        network = Net005(weight1, weight2)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            # Use bias in linear1
+            _l1 = _ann_bit_trunc(
+                inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE)
+                + network.l1.bias,
+                bit_trunc=network.l1.bit_trunc,
+            )
+            # The miintermidiate result is correct
+            assert np.array_equal(sim.data[network.p1][i], _l1)
+
+            if i > 0:
+                # The input of Linear2 is the output of Linear1 at the last timestamp
+                ref = _ann_bit_trunc(
+                    sim.data[network.p1][i - 1] @ weight2.astype(VOLTAGE_DTYPE)
+                    + network.l2.bias,
+                    bit_trunc=network.l2.bit_trunc,
+                )
+                # At ts >= 2, Linear2 is outputing
+                assert np.array_equal(sim.data[network.p2][i], ref)
+            else:
+                # At ts = 1, Linear2 is not working, no output data
+                ref = np.zeros_like(sim.data[network.p2][i])
+
+            assert np.array_equal(sim.data[network.p2][i], ref)
+
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p2][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE,
+                weight1=weight1,
+                weight2=weight2,
+                inpdata1=inpdata1,
+                refresult1=refresult1,
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_006(self):
+        class Net006(pb.Network):
+            def __init__(self, w1, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape1[0],))
+                # Start at ts=1, no end
+                self.l1 = pb.Linear(
+                    self.i1, shape1[1], w1, tick_wait_start=1, tick_wait_end=0
+                )
+                # Start at ts=2, no end
+                self.l2 = pb.Linear(
+                    self.l1, shape2[1], w2, tick_wait_start=2, tick_wait_end=0
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+                self.p2 = pb.Probe(self.l2, "feature_map")
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_006.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (240, 100)
+        shape2 = (shape1[1], 10)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            weight2 = npz["weight2"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=1, enable weight bit optimization
+            weight1 = FIXED_RNG.integers(
+                0, 1, size=shape1, dtype=np.int8, endpoint=True
+            )
+            # W=1
+            weight2 = FIXED_RNG.integers(
+                0, 1, size=shape2, dtype=np.int8, endpoint=True
+            )
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 5, size=(sim_time, shape1[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape2[1]), dtype=np.uint8)
+
+        network = Net006(weight1, weight2)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            _l1 = _ann_bit_trunc(
+                inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE),
+                bit_trunc=network.l1.bit_trunc,
+            )
+            # The miintermidiate result is correct
+            assert np.array_equal(sim.data[network.p1][i], _l1)
+
+            if i > 0:
+                # The input of Linear2 is the output of Linear1 at the last timestamp
+                ref = _ann_bit_trunc(
+                    sim.data[network.p1][i - 1] @ weight2.astype(VOLTAGE_DTYPE),
+                    bit_trunc=network.l2.bit_trunc,
+                )
+                # At ts >= 2, Linear2 is outputing
+                assert np.array_equal(sim.data[network.p2][i], ref)
+            else:
+                # At ts = 1, Linear2 is not working, no output data
+                ref = np.zeros_like(sim.data[network.p2][i])
+
+            assert np.array_equal(sim.data[network.p2][i], ref)
+
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p2][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE,
+                weight1=weight1,
+                weight2=weight2,
+                inpdata1=inpdata1,
+                refresult1=refresult1,
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=True)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="bin", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_007(self):
+        class Net007(pb.Network):
+            def __init__(self, w1):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape1[0],))
+                # Start at ts=1, no end
+                self.l1 = pb.Linear(
+                    self.i1, shape1[1], w1, tick_wait_start=1, tick_wait_end=0
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_007.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (120, 800)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=4, enable weight bit optimization
+            weight1 = FIXED_RNG.integers(-8, 8, size=shape1, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 3, size=(sim_time, shape1[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape1[1]), dtype=np.uint8)
+
+        network = Net007(weight1)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            ref = _ann_bit_trunc(
+                inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE),
+                bit_trunc=network.l1.bit_trunc,
+            )
+
+            assert np.array_equal(sim.data[network.p1][i], ref)
+
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p1][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=True)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_008(self):
+        class Net008(pb.Network):
+            def __init__(self, w1, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=(shape1[0],))
+                # Start at ts=1, no end
+                self.l1 = pb.Linear(
+                    self.i1, shape1[1], w1, tick_wait_start=1, tick_wait_end=0
+                )
+                # Start at ts=2, no end
+                self.l2 = pb.Linear(
+                    self.l1, shape2[1], w2, tick_wait_start=2, tick_wait_end=0
+                )
+                self.p1 = pb.Probe(self.l1, "feature_map")
+                self.p2 = pb.Probe(self.l2, "feature_map")
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_008.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (120, 600)
+        shape2 = (shape1[1], 10)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            weight2 = npz["weight2"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=4, enable weight bit optimization
+            weight1 = FIXED_RNG.integers(-8, 8, size=shape1, dtype=np.int8)
+            # W=8
+            weight2 = FIXED_RNG.integers(-15, 15, size=shape2, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                np.iinfo(np.uint8).min, 5, size=(sim_time, shape1[0]), dtype=np.uint8
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, shape2[1]), dtype=np.uint8)
+
+        network = Net008(weight1, weight2)
+        sim = pb.Simulator(network, start_time_zero=False)
+
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+        # Check
+        for i in range(sim_time):
+            _l1 = _ann_bit_trunc(
+                inpdata1[i, :].ravel() @ weight1.astype(VOLTAGE_DTYPE),
+                bit_trunc=network.l1.bit_trunc,
+            )
+            # The miintermidiate result is correct
+            assert np.array_equal(sim.data[network.p1][i], _l1)
+
+            if i > 0:
+                # The input of Linear2 is the output of Linear1 at the last timestamp
+                ref = _ann_bit_trunc(
+                    sim.data[network.p1][i - 1] @ weight2.astype(VOLTAGE_DTYPE),
+                    bit_trunc=network.l2.bit_trunc,
+                )
+                # At ts >= 2, Linear2 is outputing
+                assert np.array_equal(sim.data[network.p2][i], ref)
+            else:
+                # At ts = 1, Linear2 is not working, no output data
+                ref = np.zeros_like(sim.data[network.p2][i])
+
+            assert np.array_equal(sim.data[network.p2][i], ref)
+
+            if USE_EXISTING_DATA:
+                assert np.array_equal(ref, refresult1[i, :])
+            else:
+                refresult1[i, :] = sim.data[network.p2][i]
+
+            print(f"t={i + 1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE,
+                weight1=weight1,
+                weight2=weight2,
+                inpdata1=inpdata1,
+                refresult1=refresult1,
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=True)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="bin", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+
+class TestOnBoard_SpikingOp:
+    def test_Conv1d_001(self):
+        class Net001(pb.Network):
+            def __init__(self, w1):
+                super().__init__()
+                self.i1 = pb.InputProj(_out_bypass1, shape_out=shape1)
+                # Start at ts=1, no end
+                self.n1 = pb.IF(out_shape, 10, tick_wait_start=1, tick_wait_end=0)
+                self.conv = pb.Conv1d(self.i1, self.n1, w1)
+                self.p1 = pb.Probe(self.n1, "feature_map")
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv1d_001.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (8, 100)  # C*L
+        ksize = (4, shape1[0], 8)  # O*C*K
+        out_shape = (4, 93)
+
+        sim_time = 5
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-10, 12, size=ksize, dtype=np.int8)
+            inpdata1 = FIXED_RNG.integers(
+                0, 1, size=(sim_time,) + shape1, dtype=np.bool_, endpoint=True
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time,) + out_shape, dtype=np.bool_)
+
+        network = Net001(weight1)
+        sim = pb.Simulator(network, start_time_zero=False)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[i, :])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[network.p1][i]
+
+            print(f"t={i + 1}\n", sim.data[network.p1][i])
+
+        # Check
+        # TODO the result of conv1d is supposed to pass to LIF
+        # for i in range(sim_time):
+        #     ref = conv1d_golden(inpdata1[i, :], (out_shape[1],), weight1, (1,), (0,))
+        #     assert np.array_equal(sim.data[network.p1][i], ref)
+
+        #     if USE_EXISTING_DATA:
+        #         assert np.array_equal(ref, refresult1[i, :])
+        #     else:
+        #         refresult1[i, :] = sim.data[network.p2][i]
+
+        #     print(f"t={i+1}\n", ref)
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+
+class TestOnBoard_SemiFoldedOp:
+    def test_Conv2dSemiFolded_001(self):
+        class Net001(pb.DynSysGroup):
+            def __init__(self, w1):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(
+                    self.i1,
+                    w1,
+                    1,
+                    0,
+                    tick_wait_start=1,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_001.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (1, 64, 64)  # C*H*W
+        ksize = (4, shape1[0], 7, 7)  # O*C*K*K
+        out_shape = (4, 58, 58)
+
+        sim_time = 65
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-10, 10, size=ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net001(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_Conv2dSemiFolded_002(self):
+        class Net002(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(
+                    input=_out_bypass1, shape_out=shape1[:2]
+                )  # Changed input shape
+                self.conv1 = pb.Conv2dSemiFolded(
+                    self.i1,
+                    w2,
+                    2,  # Changed stride
+                    0,
+                    tick_wait_start=1,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_002.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (8, 64, 64)  # C*H*W
+        ksize = (4, shape1[0], 7, 7)  # O*C*K*k
+        out_shape = (4, 29, 29)
+
+        sim_time = 65
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-10, 10, size=ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net002(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_Conv2dSemiFolded_003(self):
+        class Net003(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(
+                    input=_out_bypass1, shape_out=shape1[:2]
+                )  # Changed input shape
+                self.conv1 = pb.Conv2dSemiFolded(
+                    self.i1,
+                    w2,
+                    1,  # Changed stride
+                    1,
+                    tick_wait_start=1,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_003.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (8, 64, 64)  # C*H*W
+        ksize = (4, shape1[0], 3, 3)  # O*C*K*k
+        out_shape = (4, 64, 64)
+
+        sim_time = 65
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-10, 10, size=ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net003(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_MaxPool2dSemiFolded_004(self):
+        class Net004(pb.DynSysGroup):
+            def __init__(self, ksize):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.maxpool1 = pb.MaxPool2dSemiFolded(
+                    self.i1,
+                    ksize,
+                    2,
+                    tick_wait_start=1,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_MaxPool2dSemiFolded_004.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (3, 32, 32)  # C*H*W
+        ksize = (2, 2)  # O*C*K*K
+        out_shape = (3, 16, 16)
+
+        sim_time = 32
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+
+        try:
+            npz = np.load(NPZ_FILE)
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net004(ksize)
+        maxpool = network.maxpool1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[maxpool][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(NPZ_FILE, inpdata1=inpdata1, refresult1=refresult1)
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_AvgPool2dSemiFolded_005(self):
+        class Net005(pb.DynSysGroup):
+            def __init__(self, ksize):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.maxpool1 = pb.AvgPool2dSemiFolded(
+                    self.i1,
+                    ksize,
+                    2,
+                    0,
+                    tick_wait_start=1,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_AvgPool2dSemiFolded_005.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (3, 32, 32)  # C*H*W
+        ksize = (2, 2)  # O*C*K*K
+        out_shape = (3, 16, 16)
+
+        sim_time = 32
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+
+        try:
+            npz = np.load(NPZ_FILE)
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net005(ksize)
+        maxpool = network.maxpool1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[maxpool][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(NPZ_FILE, inpdata1=inpdata1, refresult1=refresult1)
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_Conv2dSemiFoldedNet_006(self):
+        class Net006(pb.DynSysGroup):
+            def __init__(self, w1, w2, w3):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(
+                    self.i1,
+                    w1,
+                    1,
+                    1,
+                    tick_wait_start=1,
+                )
+
+                self.conv2 = pb.Conv2dSemiFolded(
+                    self.conv1,
+                    w2,
+                    1,
+                    1,
+                    tick_wait_start=3,
+                )
+
+                self.linear1 = pb.LinearSemiFolded(
+                    self.conv2,
+                    out_shape[1],
+                    weights=w3,
+                    bias=2,
+                    conn_type=pb.SynConnType.All2All,
+                    tick_wait_start=5,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFoldedNet_006.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (3, 32, 32)  # C*H*W
+        ksize1 = (4, shape1[0], 3, 3)  # O*C*K*K
+        ksize2 = (4, ksize1[0], 3, 3)
+        out_shape = (4 * 32 * 32, 10)
+
+        sim_time = 40
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            weight2 = npz["weight2"]
+            weight3 = npz["weight3"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(0, 3, size=ksize1, dtype=np.int8)
+            weight2 = FIXED_RNG.integers(-3, 3, size=ksize2, dtype=np.int8)
+            weight3 = FIXED_RNG.integers(-3, 5, size=out_shape, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, out_shape[1]), dtype=NEUOUT_U8_DTYPE)
+
+        network = Net006(weight1, weight2, weight3)
+        conv2d1 = network.conv1
+        conv2d2 = network.conv2
+        linear = network.linear1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe1 = pb.Probe(generated[conv2d1][0], "output")
+        probe2 = pb.Probe(generated[conv2d2][0], "output")
+        probe3 = pb.Probe(generated[linear][0], "output")
+
+        sim.add_probe(probe1)
+        sim.add_probe(probe2)
+        sim.add_probe(probe3)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe3][i]
+
+            print(f"t={i + 1}\n", sim.data[probe3][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE,
+                weight1=weight1,
+                weight2=weight2,
+                weight3=weight3,
+                inpdata1=inpdata1,
+                refresult1=refresult1,
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_Conv2dSemiFoldedNet_007(self):
+        class Net007(pb.DynSysGroup):
+            def __init__(self, w1, w2, w3):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(
+                    self.i1,
+                    w1,
+                    2,
+                    1,
+                    tick_wait_start=1,
+                )
+
+                self.conv2 = pb.Conv2dSemiFolded(
+                    self.conv1,
+                    w2,
+                    2,
+                    1,
+                    tick_wait_start=3,
+                )
+
+                self.linear1 = pb.LinearSemiFolded(
+                    self.conv2,
+                    out_shape[1],
+                    weights=w3,
+                    bias=2,
+                    conn_type=pb.SynConnType.All2All,
+                    tick_wait_start=5,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFoldedNet_007.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (3, 32, 32)  # C*H*W
+        ksize1 = (4, shape1[0], 4, 4)  # O*C*K*K
+        ksize2 = (4, ksize1[0], 4, 4)
+        out_shape = (4 * 8 * 8, 10)
+
+        sim_time = 40
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            weight2 = npz["weight2"]
+            weight3 = npz["weight3"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(0, 3, size=ksize1, dtype=np.int8)
+            weight2 = FIXED_RNG.integers(-3, 3, size=ksize2, dtype=np.int8)
+            weight3 = FIXED_RNG.integers(-3, 5, size=out_shape, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, out_shape[1]), dtype=NEUOUT_U8_DTYPE)
+
+        network = Net007(weight1, weight2, weight3)
+        conv2d1 = network.conv1
+        conv2d2 = network.conv2
+        linear = network.linear1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe1 = pb.Probe(generated[conv2d1][0], "output")
+        probe2 = pb.Probe(generated[conv2d2][0], "output")
+        probe3 = pb.Probe(generated[linear][0], "output")
+
+        sim.add_probe(probe1)
+        sim.add_probe(probe2)
+        sim.add_probe(probe3)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe3][i]
+
+            print(f"t={i + 1}\n", sim.data[probe3][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE,
+                weight1=weight1,
+                weight2=weight2,
+                weight3=weight3,
+                inpdata1=inpdata1,
+                refresult1=refresult1,
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_CNNSemiFoldedNet_008(self):
+        class Net008(pb.DynSysGroup):
+            def __init__(self, w1, w2, w3):
+                super().__init__()
+
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w1, 1, 1, tick_wait_start=1)
+                self.pool1 = pb.MaxPool2dSemiFolded(
+                    self.conv1, (2, 2), 2, tick_wait_start=3
+                )
+                self.conv2 = pb.Conv2dSemiFolded(
+                    self.pool1, w2, 1, 1, tick_wait_start=5
+                )
+                self.pool2 = pb.MaxPool2dSemiFolded(
+                    self.conv2, (2, 2), 2, tick_wait_start=7
+                )
+                self.linear1 = pb.LinearSemiFolded(
+                    self.pool2,
+                    out_shape[1],
+                    weights=w3,
+                    bias=2,
+                    conn_type=pb.SynConnType.All2All,
+                    tick_wait_start=9,
+                )
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_CNNSemiFoldedNet_008.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (3, 32, 32)  # C*H*W
+        ksize1 = (4, shape1[0], 3, 3)  # O*C*K*K
+        ksize2 = (4, ksize1[0], 3, 3)
+        out_shape = (4 * 8 * 8, 10)
+
+        sim_time = 42
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            weight2 = npz["weight2"]
+            weight3 = npz["weight3"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(0, 3, size=ksize1, dtype=np.int8)
+            weight2 = FIXED_RNG.integers(-3, 3, size=ksize2, dtype=np.int8)
+            weight3 = FIXED_RNG.integers(-3, 5, size=out_shape, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros((sim_time, out_shape[1]), dtype=NEUOUT_U8_DTYPE)
+
+        network = Net008(weight1, weight2, weight3)
+        conv2d1 = network.conv1
+        conv2d2 = network.conv2
+        linear = network.linear1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe1 = pb.Probe(generated[conv2d1][0], "output")
+        probe2 = pb.Probe(generated[conv2d2][0], "output")
+        probe3 = pb.Probe(generated[linear][0], "output")
+
+        sim.add_probe(probe1)
+        sim.add_probe(probe2)
+        sim.add_probe(probe3)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe3][i]
+
+            print(f"t={i + 1}\n", sim.data[probe3][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE,
+                weight1=weight1,
+                weight2=weight2,
+                weight3=weight3,
+                inpdata1=inpdata1,
+                refresult1=refresult1,
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+
+if __name__ == "__main__":
+    # NOTE: run test cases by cli
+    # For example:
+    # >>> cd paibox
+    # >>> poetry run python ./tests/on_board/test_onboard.py
+    test_fp = Path.cwd() / Path(__file__)
+    test_class = "TestOnBoard_WRAMMapping"
+    test_case_name = "test_007"
+    # Run a specific test case
+    retcode = pytest.main(["-s", f"{test_fp}::{test_class}::{test_case_name}"])

From 9ed46f3cd9f354e4e2f0f5c28ab8e4af94be8534 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Fri, 25 Oct 2024 17:17:01 +0800
Subject: [PATCH 105/187] =?UTF-8?q?=E2=9C=85=20skip=20an=20xfail=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/onboard/test_onboard.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/onboard/test_onboard.py b/tests/onboard/test_onboard.py
index 89e1eb8d..5d965900 100644
--- a/tests/onboard/test_onboard.py
+++ b/tests/onboard/test_onboard.py
@@ -1291,6 +1291,7 @@ def __init__(self, ksize):
 
         print(f"Test {TEST_NAME} end")
 
+    @pytest.mark.xfail(reason="A ValidationError will be raised due to the backend not support.")
     def test_Conv2dSemiFoldedNet_006(self):
         class Net006(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):

From d8a009453a8c52c7e35b216c7180e60e148fbf64 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 25 Oct 2024 09:52:54 +0000
Subject: [PATCH 106/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/backend/conf_exporting.py     |   2 +-
 paibox/backend/conf_types.py         |   6 +-
 paibox/backend/graphs.py             |  11 ++-
 paibox/backend/mapper.py             |   7 +-
 paibox/backend/placement.py          |  23 +++--
 paibox/backend/routing.py            | 141 ++++++++++++++++-----------
 paibox/backend/types.py              |  19 ++--
 paibox/components/synapses/base.py   |   8 +-
 tests/backend/test_conf_exporting.py |  11 ++-
 tests/backend/test_graphs.py         |   6 +-
 tests/backend/test_placement.py      |   9 +-
 tests/onboard/.gitignore             |   2 +-
 tests/onboard/README.md              |   6 +-
 tests/onboard/test_onboard.py        |  47 ++++-----
 14 files changed, 177 insertions(+), 121 deletions(-)

diff --git a/paibox/backend/conf_exporting.py b/paibox/backend/conf_exporting.py
index ac478fb1..eee50e45 100644
--- a/paibox/backend/conf_exporting.py
+++ b/paibox/backend/conf_exporting.py
@@ -8,7 +8,6 @@
 from paicorelib.framelib import OfflineFrameGen
 from paicorelib.framelib.utils import _mask, np2bin, np2npy, np2txt
 
-from .placement import CorePlacement
 from paibox.components import Neuron
 from paibox.utils import reverse_8bit
 
@@ -23,6 +22,7 @@
     OutputDestConf,
 )
 from .context import _BACKEND_CONTEXT
+from .placement import CorePlacement
 from .types import _RID_UNSET
 
 if _USE_ORJSON:
diff --git a/paibox/backend/conf_types.py b/paibox/backend/conf_types.py
index 75d86171..5259b1e5 100644
--- a/paibox/backend/conf_types.py
+++ b/paibox/backend/conf_types.py
@@ -16,8 +16,11 @@
     NeuronConf,
     NeuronDestInfo,
     ParamsReg,
+    SNNModeEnable,
+    SpikeWidthFormat,
+    WeightWidth,
+    get_replication_id,
 )
-from paicorelib import SNNModeEnable, SpikeWidthFormat, WeightWidth, get_replication_id
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias
@@ -25,6 +28,7 @@
     from typing_extensions import TypeAlias
 
 from paibox.components import Neuron
+
 from .types import AxonCoord, NeuSegment, NodeName, WRAMPackedType
 
 try:
diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index c2609326..80c553cc 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -221,13 +221,16 @@ def graph_partition(self) -> list[RouteGroup]:
                 succ_nodes = [self._raw_nodes[n] for n in succ_node_names]
                 succ_edges = [self.succ_dg[node][n.name].edge for n in succ_nodes]
                 groups.append(SuccGroup(succ_nodes, succ_edges, self._raw_nodes[node]))
-        
-        #并查集过程，将所有分组相互合并，合并条件是两个分组有交集，合并结果用RouteGroup表示
+
+        # 并查集过程，将所有分组相互合并，合并条件是两个分组有交集，合并结果用RouteGroup表示
         route_groups: list[RouteGroup] = list()
         visited = set()
+
         def dfs(group: SuccGroup, visited: set[SuccGroup], route_group: RouteGroup):
             for other_group in groups:
-                if other_group not in visited and not set(group.nodes).isdisjoint(other_group.nodes):
+                if other_group not in visited and not set(group.nodes).isdisjoint(
+                    other_group.nodes
+                ):
                     visited.add(other_group)
                     route_group.add_group(other_group)
                     dfs(other_group, visited, route_group)
@@ -239,7 +242,7 @@ def dfs(group: SuccGroup, visited: set[SuccGroup], route_group: RouteGroup):
                 visited.add(group)
                 dfs(group, visited, route_group)
                 route_groups.append(route_group)
-        
+
         return route_groups
 
     def multicast_optim(
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 13e54966..06bcc3c2 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -11,6 +11,7 @@
 from paibox.exceptions import ConfigInvalidError, ResourceError
 from paibox.network import DynSysGroup
 
+from .conf_exporting import *
 from .conf_types import (
     CoreConf,
     CorePlmConf,
@@ -20,7 +21,6 @@
     InputNodeConf,
     OutputDestConf,
 )
-from .conf_exporting import *
 from .context import _BACKEND_CONTEXT, set_cflag
 from .graphs import (
     PAIGraph,
@@ -208,11 +208,11 @@ def untwist_branch_nodes(self) -> None:
     def build_core_blocks(self) -> None:
         """Build core blocks based on partitioned edges."""
         route_groups = self.graph.graph_partition()
-        
+
         for route_group in route_groups:
             route_group.dump()
             self.routing_groups.append(RoutingGroup(route_group))
-        
+
         for rg in self.routing_groups:
             self.core_blocks.extend(rg.core_blocks)
 
@@ -249,7 +249,6 @@ def build_core_blocks(self) -> None:
                     if cb in _rg:
                         self.succ_routing_groups[rg].append(_rg)
                         break
-        
 
     def lcn_ex_adjustment(self) -> None:
         """Adjust the LCN of each core block & set target LCN."""
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index f6ed26e8..ec24cb57 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -9,33 +9,38 @@
 from paicorelib.framelib import OfflineFrameGen
 
 from paibox.components import FullConnectedSyn, Neuron
-from paibox.exceptions import GraphBuildError, ResourceError, TruncationWarning, NotSupportedError
+from paibox.exceptions import (
+    GraphBuildError,
+    NotSupportedError,
+    ResourceError,
+    TruncationWarning,
+)
 from paibox.types import WEIGHT_DTYPE, WeightType
 from paibox.utils import check_attr_same
 
 from .conf_types import CoreConfig, CoreConfInChip, CorePlmConfig, NeuronConfig
-from .context import _BACKEND_CONTEXT
 from .constrs import GraphNodeConstrs
+from .context import _BACKEND_CONTEXT
 from .segment_utils import aligned_coords, get_axon_segments, get_neu_segments
 from .types import (
     _COORD_UNSET,
     _RID_UNSET,
+    N_BIT_PACKED_WEIGHT,
     WRAM_PACKED_DTYPE,
     WRAM_UNPACKED_DTYPE,
-    N_BIT_PACKED_WEIGHT,
     AxonCoord,
     AxonSegment,
     CoreAbstract,
     DestNodeType,
+    EdgeType,
     NeuSegment,
     NeuSegOfCoreBlock,
     NeuSegOfCorePlm,
+    RouteGroup,
     SourceNodeType,
     WRAMPackedType,
     WRAMUnpackedType,
     is_iw8,
-    RouteGroup,
-    EdgeType,
 )
 
 
@@ -387,21 +392,21 @@ def build(
 
     @classmethod
     def build_core_blocks(cls, route_group: RouteGroup) -> list["CoreBlock"]:
-        core_blocks:list[CoreBlock] = []
+        core_blocks: list[CoreBlock] = []
         succ_nodes = list(route_group.nodes)
         mode = succ_nodes[0].mode
-        if any (node.mode != mode for node in succ_nodes):
+        if any(node.mode != mode for node in succ_nodes):
             raise NotSupportedError("mixed mode is not supported.")
         idx_of_sg = GraphNodeConstrs.tick_wait_attr_constr(succ_nodes)
         route_group.set_inputs()
         if len(idx_of_sg) == 0:
             idx_of_sg = [list(range(len(succ_nodes)))]
-            
+
         for idx in idx_of_sg:
             succ_edges: set[EdgeType] = set()
             for i in idx:
                 succ_edges.update(route_group.inputs[succ_nodes[i]])
-            core_block = CoreBlock.build(*succ_edges, routing_id = 0, rt_mode = mode)
+            core_block = CoreBlock.build(*succ_edges, routing_id=0, rt_mode=mode)
             core_blocks.append(core_block)
         return core_blocks
 
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 08a2bc0a..127c4831 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -11,14 +11,15 @@
 from paicorelib import get_routing_consumption
 from paicorelib.routing_defs import MAX_ROUTING_PATH_LENGTH
 
-from paibox.exceptions import ResourceError, RoutingError
+from paibox.exceptions import GraphBuildError, ResourceError, RoutingError
 
 from .conf_types import CorePlmConfInChip
-from paibox.exceptions import ResourceError, RoutingError, GraphBuildError
 from .placement import CoreBlock, CorePlacement, EmptyCorePlacement
 from .types import *
+
 __all__ = ["RoutingGroup", "RoutingRoot"]
 
+
 def Coord2RoutingCoord(coord: Coord) -> RoutingCoord:
     directions: list[Direction] = []
     x = coord.x
@@ -30,7 +31,8 @@ def Coord2RoutingCoord(coord: Coord) -> RoutingCoord:
         value_x = (x >> shift) & 0b1  # 取出当前位的值
         value_y = (y >> shift) & 0b1
         directions.append(Direction((value_x, value_y)))
-    return RoutingCoord(*directions)    
+    return RoutingCoord(*directions)
+
 
 class RoutingCluster:
     def __init__(
@@ -471,75 +473,99 @@ def routing_coord(self) -> RoutingCoord:
 
         return RoutingCoord(*reversed(path))
 
+
 # each sub routing group should be able to route by single coord
 class SubRoutingGroup:
     index = 0
-    def __init__(self, unorder_elements: list[Union[CoreBlock,"SubRoutingGroup"]], ordered_elements: list["SubRoutingGroup"]) -> None:
-        self.unorder_elements:list[Union[CoreBlock,"SubRoutingGroup"]] = unorder_elements
-        self.ordered_elements:list["SubRoutingGroup"] = ordered_elements
-        self.routing_elements:list[Union[CoreBlock,"SubRoutingGroup"]] = unorder_elements + ordered_elements
-        self.offset:list[int] = list()
-        self.n_core_required:int = 0
-        self.tail_wasted:int = 0
+
+    def __init__(
+        self,
+        unorder_elements: list[Union[CoreBlock, "SubRoutingGroup"]],
+        ordered_elements: list["SubRoutingGroup"],
+    ) -> None:
+        self.unorder_elements: list[Union[CoreBlock, "SubRoutingGroup"]] = (
+            unorder_elements
+        )
+        self.ordered_elements: list["SubRoutingGroup"] = ordered_elements
+        self.routing_elements: list[Union[CoreBlock, "SubRoutingGroup"]] = (
+            unorder_elements + ordered_elements
+        )
+        self.offset: list[int] = list()
+        self.n_core_required: int = 0
+        self.tail_wasted: int = 0
         self.name = f"SubRoutingGroup[{SubRoutingGroup.index}]"
-        axons:set[SourceNodeType] = set()
+        axons: set[SourceNodeType] = set()
         for element in self.routing_elements:
             axons.update(element.axons)
-        self.axons:list[SourceNodeType] = list(axons)
+        self.axons: list[SourceNodeType] = list(axons)
         SubRoutingGroup.index += 1
-    
+
     def set_config(self):
         for element in self.routing_elements:
             if isinstance(element, SubRoutingGroup):
                 element.set_config()
 
         # unorder elements sorted from big to small, avoiding assigning waste.
-        unorder_elements = sorted(self.unorder_elements, key=lambda x: x.n_core_required, reverse=True)
+        unorder_elements = sorted(
+            self.unorder_elements, key=lambda x: x.n_core_required, reverse=True
+        )
         ordered_elements = self.ordered_elements
         for element in unorder_elements:
             n_core_required = element.n_core_required
             self.offset.append(self.n_core_required)
             self.n_core_required += n_core_required
-        
+
         # ordered elements should be assgined first
         for element in ordered_elements:
             n_core_required = element.n_core_required
-            n_core_assigned = _nearest_multiple_above(self.n_core_required, n_core_required)
+            n_core_assigned = _nearest_multiple_above(
+                self.n_core_required, n_core_required
+            )
             self.offset.append(n_core_assigned)
             self.n_core_required = n_core_assigned + n_core_required
-        
-        #routing elements should satisfy topological order
-        self.routing_elements:list[Union[CoreBlock,"SubRoutingGroup"]] = unorder_elements + ordered_elements
-        
-        
-        sub_tail_wasted = 0 if isinstance(self.routing_elements[-1], CoreBlock) else self.routing_elements[-1].tail_wasted
+
+        # routing elements should satisfy topological order
+        self.routing_elements: list[Union[CoreBlock, "SubRoutingGroup"]] = (
+            unorder_elements + ordered_elements
+        )
+
+        sub_tail_wasted = (
+            0
+            if isinstance(self.routing_elements[-1], CoreBlock)
+            else self.routing_elements[-1].tail_wasted
+        )
         assigned_n_core_required = 1 << (self.n_core_required - 1).bit_length()
-        self.tail_wasted += assigned_n_core_required - self.n_core_required + sub_tail_wasted
+        self.tail_wasted += (
+            assigned_n_core_required - self.n_core_required + sub_tail_wasted
+        )
         self.n_core_required = assigned_n_core_required
-        
-        
+
     # return Coord that wasted in subrouting group
-    def assign(self, allocated: list[Coord], chip_coord: Coord) -> tuple[list[Coord], list[Coord]]:
+    def assign(
+        self, allocated: list[Coord], chip_coord: Coord
+    ) -> tuple[list[Coord], list[Coord]]:
         cur_i = 0
-        assigned_coords:list[Coord] = []
-        wasted_coords:list[Coord] = []
+        assigned_coords: list[Coord] = []
+        wasted_coords: list[Coord] = []
         for element, offset in zip(self.routing_elements, self.offset):
             if offset > cur_i:
-                wasted_coords = wasted_coords + allocated[cur_i : offset]
+                wasted_coords = wasted_coords + allocated[cur_i:offset]
             cur_i = offset
-            
+
             n = element.n_core_required
-            print(f"element: {element.name}, {n} cores, start at {Coord2RoutingCoord(allocated[cur_i])}")
+            print(
+                f"element: {element.name}, {n} cores, start at {Coord2RoutingCoord(allocated[cur_i])}"
+            )
             assigned, wasted = element.assign(allocated[cur_i : cur_i + n], chip_coord)
             assigned_coords = assigned_coords + assigned
             wasted_coords = wasted_coords + wasted
             cur_i += n
         return assigned_coords, wasted_coords + allocated[cur_i:]
-    
+
     # use list to keep the order of axons
     def group_axons(self, multicast_axons: list[SourceNodeType]) -> None:
         private_multicast_axons = multicast_axons.copy()
-        axons_count:list[int] = [0] * len(self.axons)
+        axons_count: list[int] = [0] * len(self.axons)
         for element in self.routing_elements:
             for axon in element.axons:
                 idx = self.axons.index(axon)
@@ -547,10 +573,10 @@ def group_axons(self, multicast_axons: list[SourceNodeType]) -> None:
         for i, axon in enumerate(self.axons):
             if axons_count[i] > 1 and axon not in private_multicast_axons:
                 private_multicast_axons.append(axon)
-        
+
         for element in self.routing_elements:
             element.group_axons(private_multicast_axons)
-        
+
     @property
     def core_blocks(self) -> list[CoreBlock]:
         cbs = []
@@ -560,10 +586,10 @@ def core_blocks(self) -> list[CoreBlock]:
             else:
                 cbs += element.core_blocks
         return cbs
-    
+
     @classmethod
     def build(cls, route_group: RouteGroup) -> "SubRoutingGroup":
-        
+
         if len(route_group.nodes) == 0:
             return None
         sub_group = RouteGroup()
@@ -573,26 +599,28 @@ def build(cls, route_group: RouteGroup) -> "SubRoutingGroup":
                 sub_group.add_group(group)
             else:
                 remaining_group.add_group(group)
-                
+
         remaining_group.nodes = remaining_group.nodes - sub_group.nodes
-        unorder_elements:list[CoreBlock] = CoreBlock.build_core_blocks(remaining_group)
-        ordered_elements:list[SubRoutingGroup] = []
+        unorder_elements: list[CoreBlock] = CoreBlock.build_core_blocks(remaining_group)
+        ordered_elements: list[SubRoutingGroup] = []
         sub_routing_group: SubRoutingGroup = SubRoutingGroup.build(sub_group)
         if sub_routing_group is not None:
             ordered_elements = [sub_routing_group]
         return cls(unorder_elements, ordered_elements)
-    
-    def dump(self, i:int = 0):
+
+    def dump(self, i: int = 0):
         tabs = "\t" * i
         print(f"{tabs}SubRoutingGroup: {self.name} with {self.n_core_required} cores:")
         for element in self.routing_elements:
             if isinstance(element, SubRoutingGroup):
-                element.dump(i+1)
+                element.dump(i + 1)
             else:
                 print(f"{tabs}\t{element.name} with {element.n_core_required} cores:")
                 for edge in element._parents:
-                    print(f"{tabs}\t\t{edge.name}: {edge.source.name} -> {edge.target.name}")
-                
+                    print(
+                        f"{tabs}\t\t{edge.name}: {edge.source.name} -> {edge.target.name}"
+                    )
+
 
 class RoutingGroup:
     """Core blocks located within a routing group are routable.
@@ -600,7 +628,7 @@ class RoutingGroup:
     NOTE: Axon groups within a routing group are the same.
     """
 
-    def __init__(self, route_group:RouteGroup) -> None:
+    def __init__(self, route_group: RouteGroup) -> None:
         self.sub_routing_group: SubRoutingGroup = SubRoutingGroup.build(route_group)
         self.core_blocks = self.sub_routing_group.core_blocks
         self.assigned_coords: list[Coord] = []
@@ -611,10 +639,10 @@ def __init__(self, route_group:RouteGroup) -> None:
         """Wasted core placements"""
         self.sub_n_core_wasted = 0
 
-    def assign(
-        self, allocated: list[Coord], chip_coord: Coord
-    ) -> None:
-        print(f"route_group: {self.sub_routing_group.name} assigned from {Coord2RoutingCoord(allocated[0])}")
+    def assign(self, allocated: list[Coord], chip_coord: Coord) -> None:
+        print(
+            f"route_group: {self.sub_routing_group.name} assigned from {Coord2RoutingCoord(allocated[0])}"
+        )
         assigned, wasted = self.sub_routing_group.assign(allocated, chip_coord)
         self.assigned_coords = assigned
         self.wasted_coords = wasted
@@ -645,7 +673,7 @@ def n_core_required(self) -> int:
     @property
     def n_core_cost(self) -> int:
         return self.sub_routing_group.n_core_required
-    
+
     @property
     def tail_wasted(self) -> int:
         return self.sub_routing_group.tail_wasted
@@ -681,8 +709,7 @@ def group_axons(self) -> None:
             if not cb._lcn_locked:
                 raise GraphBuildError("get axon segments after 'lcn_ex' is locked.")
         self.sub_routing_group.group_axons([])
-        
-        
+
 
 @final
 class RoutingRoot:
@@ -753,16 +780,16 @@ def place_routing_group(self, routing_group: RoutingGroup) -> None:
         """Place a routing group in the chip list. Assign each core blocks with routing coordinates &   \
             make sure they are routable.
         """
-        print(f"Routing Group:")
+        print("Routing Group:")
         for cb in routing_group:
             print(f"\t{cb.name}")
-        
+
         n_core_cost = routing_group.n_core_cost
         tail_wasted = routing_group.tail_wasted
         n_core_req = n_core_cost - tail_wasted
         print(f"\tcost: {n_core_cost}, tail_wasted: {tail_wasted}")
 
-        if  n_core_req > HwConfig.N_CORE_OFFLINE:
+        if n_core_req > HwConfig.N_CORE_OFFLINE:
             raise ResourceError(
                 "the number of cores required by the routing group exceeds the hardware limit, "
                 f"{n_core_req} > {HwConfig.N_CORE_OFFLINE}."
@@ -771,7 +798,7 @@ def place_routing_group(self, routing_group: RoutingGroup) -> None:
         core_insert_loc, chip_idx_loc, rpath_start = self.get_insert_location(
             n_core_cost, tail_wasted
         )
-        allocated_coords:list[Coord] = []
+        allocated_coords: list[Coord] = []
 
         for i, rpath in _routing_path_generator(n_core_cost, rpath_start):
             leaf_coord = RoutingCoord(*reversed(rpath))
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index a398a49f..91f7ed70 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -14,6 +14,7 @@
 
 from paicorelib import Coord, CoreMode
 from paicorelib import ReplicationId as RId
+
 from paibox.base import PAIBoxObject
 from paibox.components import FullConnectedSyn, InputProj, Neuron
 
@@ -105,23 +106,25 @@ class PartitionedEdges(NamedTuple):
 
 NeuSlice: TypeAlias = slice
 
+
 class SuccGroup:
     # edge for input to nodes[i] is edges[i]
-    def __init__(self, nodes:list[NodeType], edges:list[EdgeType], input:NodeType):
+    def __init__(self, nodes: list[NodeType], edges: list[EdgeType], input: NodeType):
         self.nodes = nodes
         self.edges = edges
         self.input = input
 
+
 class RouteGroup:
     def __init__(self):
         self.groups: list[SuccGroup] = list()
         self.nodes: set[NodeType] = set()
         self.inputs: dict[NodeType, list[EdgeType]] = dict()
-        
-    def add_group(self, group:SuccGroup):
+
+    def add_group(self, group: SuccGroup):
         self.groups.append(group)
         self.nodes.update(group.nodes)
-    
+
     def set_inputs(self):
         for group in self.groups:
             for node, edge in zip(group.nodes, group.edges):
@@ -129,16 +132,20 @@ def set_inputs(self):
                     self.inputs[node] = list()
                 assert edge.dest.name == node.name
                 self.inputs[node].append(edge)
-    
+
     def dump(self):
         print("RouteGroup:")
         for group in self.groups:
             print(f"\tGroup: of {group.input.name}")
             for node, edge in zip(group.nodes, group.edges):
-                print(f"\t\tnode: {node.name}, edge: {edge.name}: {edge.source.name} -> {edge.dest.name}")
+                print(
+                    f"\t\tnode: {node.name}, edge: {edge.name}: {edge.source.name} -> {edge.dest.name}"
+                )
         print("\tNodes:")
         for node in self.nodes:
             print(f"\t\tnode: {node.name}")
+
+
 @dataclass(frozen=True)
 class NeuSegment:
     target: DestNodeType
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 55c17221..a444023a 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -80,9 +80,13 @@ def update(self, x: Optional[NeuOutType] = None, *args, **kwargs) -> SynOutType:
         else:
             # Retrieve 0 to the dest neurons if it is not working
             if isinstance(self.source, InputProj):
-                synin = np.zeros_like(self.source.output if x is None else np.atleast_1d(x))
+                synin = np.zeros_like(
+                    self.source.output if x is None else np.atleast_1d(x)
+                )
             else:
-                synin = np.zeros_like(self.source.delay_registers[0] if x is None else np.atleast_1d(x))
+                synin = np.zeros_like(
+                    self.source.delay_registers[0] if x is None else np.atleast_1d(x)
+                )
 
         self._synout = self.comm(synin).ravel()
 
diff --git a/tests/backend/test_conf_exporting.py b/tests/backend/test_conf_exporting.py
index f120912c..143c4c0f 100644
--- a/tests/backend/test_conf_exporting.py
+++ b/tests/backend/test_conf_exporting.py
@@ -1,10 +1,13 @@
 import random
+
 import numpy as np
 import pytest
-import paibox as pb
-
-from paicorelib import Coord, CoordOffset, CoreMode, HwConfig, LCN_EX, MaxPoolingEnable
+from paicorelib import LCN_EX, Coord, CoordOffset, CoreMode, HwConfig, MaxPoolingEnable
 from paicorelib import WeightWidth as WW
+from paicorelib.reg_model import TICK_WAIT_END_MAX, TICK_WAIT_START_MAX
+
+import paibox as pb
+from paibox.backend.conf_exporting import *
 from paibox.backend.conf_types import (
     CoreConfig,
     CorePlmConfig,
@@ -12,9 +15,7 @@
     NeuronConfig,
     NeuronDestInfo,
 )
-from paibox.backend.conf_exporting import *
 from paibox.backend.types import AxonCoord, NeuSegment
-from paicorelib.reg_model import TICK_WAIT_END_MAX, TICK_WAIT_START_MAX
 
 from .conftest import gen_random_used_lx
 
diff --git a/tests/backend/test_graphs.py b/tests/backend/test_graphs.py
index fb1fddd5..3cd372f6 100644
--- a/tests/backend/test_graphs.py
+++ b/tests/backend/test_graphs.py
@@ -262,14 +262,16 @@ def test_untwist_branch_nodes1(
         mapper.compile(no_twisted_branch=no_twisted_branch)
 
         mapper.export(fp=ensure_dump_dir)
-        
+
         if no_twisted_branch:
             assert (
                 len(mapper.graph.nodes)
                 == len(net.nodes(level=1).include(Neuron, pb.InputProj)) + net.n_copy
             )
         else:
-            assert len(mapper.graph.nodes) == len(net.nodes(level=1).include(Neuron, pb.InputProj))
+            assert len(mapper.graph.nodes) == len(
+                net.nodes(level=1).include(Neuron, pb.InputProj)
+            )
 
 
 class TestGroupEdges:
diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index a9eec14a..1d0c88e7 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -1,14 +1,15 @@
 import math
-import numpy as np
-import pytest
 import sys
 from contextlib import nullcontext
 from functools import partial
-from paicorelib import Coord, LCN_EX, HwConfig, NeuronAttrs
+from typing import Literal, Optional
+
+import numpy as np
+import pytest
+from paicorelib import LCN_EX, Coord, HwConfig, NeuronAttrs
 from paicorelib import ReplicationId as RId
 from paicorelib import WeightWidth as WW
 from paicorelib.framelib import OfflineFrameGen
-from typing import Literal, Optional
 
 import paibox as pb
 from paibox.backend.placement import CorePlacement
diff --git a/tests/onboard/.gitignore b/tests/onboard/.gitignore
index 9530332c..5d1942c1 100644
--- a/tests/onboard/.gitignore
+++ b/tests/onboard/.gitignore
@@ -1,2 +1,2 @@
 config
-data
\ No newline at end of file
+data
diff --git a/tests/onboard/README.md b/tests/onboard/README.md
index fbbfb8fa..c79cb5a8 100644
--- a/tests/onboard/README.md
+++ b/tests/onboard/README.md
@@ -61,7 +61,7 @@
 测试设置：
 
 - 两层Linear，均为8bit权重，扇入扩展E1=4，L1尺寸(200,200)；E2=4，L2尺寸(200,10)
-- L2层 `bias=2`，``bit_trunc=9``
+- L2层 `bias=2`，`bit_trunc=9`
 - 设置L1 `tws=1`，`twe=0`；L2 `tws=2`，`twe=0`
 - 运行5时间步，监测L2输出
 
@@ -85,7 +85,7 @@
 
 ### 有神经元在WRAM
 
-#### 007 单层W=4E=1 *芯片不输出
+#### 007 单层W=4E=1 \*芯片不输出
 
 测试设置：
 
@@ -98,7 +98,7 @@
 1. 在输出数据中，每个时刻均有有效数据
 2. 有效输出数据应与参考输出在每个时间步相等
 
-#### 008 多层-1 *芯片不输出
+#### 008 多层-1 \*芯片不输出
 
 测试设置：
 
diff --git a/tests/onboard/test_onboard.py b/tests/onboard/test_onboard.py
index 5d965900..f1cf28d7 100644
--- a/tests/onboard/test_onboard.py
+++ b/tests/onboard/test_onboard.py
@@ -1,10 +1,11 @@
+from pathlib import Path
+
+import numpy as np
+import pytest
+
 import paibox as pb
 from paibox.components.neuron.base import MetaNeuron
 from paibox.types import NEUOUT_U8_DTYPE, VOLTAGE_DTYPE, NeuOutType, VoltageType
-import pytest
-import numpy as np
-from pathlib import Path
-
 from tests.components.utils import conv1d_golden
 
 TEST_DIR = Path(__file__).parent
@@ -50,7 +51,7 @@ def __init__(self, w):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -138,7 +139,7 @@ def __init__(self, w):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -219,7 +220,7 @@ def __init__(self, w):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -302,7 +303,7 @@ def __init__(self, w):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -396,7 +397,7 @@ def __init__(self, w1, w2):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -510,7 +511,7 @@ def __init__(self, w1, w2):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -619,7 +620,7 @@ def __init__(self, w1):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -709,7 +710,7 @@ def __init__(self, w1, w2):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -817,7 +818,7 @@ def __init__(self, w1):
         sim_time = 5
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -908,7 +909,7 @@ def __init__(self, w1):
         sim_time = 65
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -993,7 +994,7 @@ def __init__(self, w2):
         sim_time = 65
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -1078,7 +1079,7 @@ def __init__(self, w2):
         sim_time = 65
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -1160,7 +1161,7 @@ def __init__(self, ksize):
         sim_time = 32
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
 
         try:
             npz = np.load(NPZ_FILE)
@@ -1240,7 +1241,7 @@ def __init__(self, ksize):
         sim_time = 32
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
 
         try:
             npz = np.load(NPZ_FILE)
@@ -1291,7 +1292,9 @@ def __init__(self, ksize):
 
         print(f"Test {TEST_NAME} end")
 
-    @pytest.mark.xfail(reason="A ValidationError will be raised due to the backend not support.")
+    @pytest.mark.xfail(
+        reason="A ValidationError will be raised due to the backend not support."
+    )
     def test_Conv2dSemiFoldedNet_006(self):
         class Net006(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):
@@ -1339,7 +1342,7 @@ def __init__(self, w1, w2, w3):
         sim_time = 40
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
 
         try:
             npz = np.load(NPZ_FILE)
@@ -1455,7 +1458,7 @@ def __init__(self, w1, w2, w3):
         sim_time = 40
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
         try:
             npz = np.load(NPZ_FILE)
             weight1 = npz["weight1"]
@@ -1566,7 +1569,7 @@ def __init__(self, w1, w2, w3):
         sim_time = 42
 
         USE_EXISTING_DATA = False
-        NPZ_FILE = TEST_CASE_DIR / f"data.npz"
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
 
         try:
             npz = np.load(NPZ_FILE)

From 249eeabc5b97fe717375a2607dd8a85b2b5d168f Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Sat, 26 Oct 2024 14:01:26 +0800
Subject: [PATCH 107/187] =?UTF-8?q?=F0=9F=9A=9A=20optim:=20rename=20`Route?=
 =?UTF-8?q?Group`=20to=20`MergedSuccGroup`=20&=20optimized=20graph=20parti?=
 =?UTF-8?q?tioning?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py | 140 +++++++++++++++++++--------------------
 paibox/backend/types.py  |  49 +++++++++-----
 2 files changed, 102 insertions(+), 87 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 80c553cc..cca7d0b0 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -12,7 +12,6 @@
 from paibox.network import DynSysGroup
 from paibox.utils import check_elem_unique
 
-from .constrs import GraphNodeConstrs
 from .context import _BACKEND_CONTEXT
 from .placement import CoreBlock
 from .routing import RoutingGroup
@@ -60,8 +59,6 @@ class PAIGraph:
     """Status options"""
     has_built: bool = field(default=False)
 
-    # node_constrs: GraphNodeConstrs = field(default_factory=GraphNodeConstrs)
-
     def clear(self, total: bool = True) -> None:
         """Clear the PAIGraph."""
         self.has_built = False
@@ -213,34 +210,36 @@ def build_check(self) -> None:
         if not self.has_built:
             raise GraphBuildError("the graph hasn't been built yet.")
 
-    def graph_partition(self) -> list[RouteGroup]:
-        groups: list[SuccGroup] = list()
+    def graph_partition(self) -> list[MergedSuccGroup]:
+        """Graph partition."""
+        # Build the SuccGroup for each node in the graph.
+        succ_groups: list[SuccGroup] = []
         for node in self.ordered_nodes:
             succ_node_names = set(self.succ_dg[node].keys())
-            if len(succ_node_names) > 0:
+            if succ_node_names:
                 succ_nodes = [self._raw_nodes[n] for n in succ_node_names]
                 succ_edges = [self.succ_dg[node][n.name].edge for n in succ_nodes]
-                groups.append(SuccGroup(succ_nodes, succ_edges, self._raw_nodes[node]))
-
-        # 并查集过程，将所有分组相互合并，合并条件是两个分组有交集，合并结果用RouteGroup表示
-        route_groups: list[RouteGroup] = list()
-        visited = set()
+                succ_groups.append(
+                    SuccGroup(self._raw_nodes[node], succ_nodes, succ_edges)
+                )
 
-        def dfs(group: SuccGroup, visited: set[SuccGroup], route_group: RouteGroup):
-            for other_group in groups:
-                if other_group not in visited and not set(group.nodes).isdisjoint(
-                    other_group.nodes
+        def dfs(sgrp: SuccGroup, rgrp: MergedSuccGroup) -> None:
+            # Union-find sets. If the nodes of two succ_groups have intersection, merge them.
+            for other_sgrp in succ_groups:
+                if other_sgrp not in visited and not set(sgrp.nodes).isdisjoint(
+                    other_sgrp.nodes
                 ):
-                    visited.add(other_group)
-                    route_group.add_group(other_group)
-                    dfs(other_group, visited, route_group)
-
-        for group in groups:
-            if group not in visited:
-                route_group = RouteGroup()
-                route_group.add_group(group)
-                visited.add(group)
-                dfs(group, visited, route_group)
+                    visited.add(other_sgrp)
+                    rgrp.add_group(other_sgrp)
+                    dfs(other_sgrp, rgrp)
+
+        route_groups: list[MergedSuccGroup] = []
+        visited: set[SuccGroup] = set()
+        for sgrp in succ_groups:
+            if sgrp not in visited:
+                route_group = MergedSuccGroup(sgrp)
+                visited.add(sgrp)
+                dfs(sgrp, route_group)
                 route_groups.append(route_group)
 
         return route_groups
@@ -418,31 +417,32 @@ def _copy_succ_conn(
                     f"not all nodes in 'grab_pred_nodes' are in node {node.name}'s predecessors. "
                     f"Got {', '.join(grab_pred_nodes)}, but predecessors are {', '.join(pred_nodes)}."
                 )
-            else:
-                if copied.name not in self.pred_dg.keys():
-                    self.pred_dg[copied.name] = dict()
-                for pred_nn in grab_pred_nodes:
-                    pred_edge = pred_nodes[pred_nn].edge
-                    pred_edge.target = copied
-                    # If don't _update_graph(), update partial information:
-                    # 1. Remove the original connection & add the copied node & the edge
-                    # with the modified target to succ_nodes_dict & succ_dg.
-                    # 2. Update the in-degree of copied node = len(grab).
-                    # 3. The out-degree of predecessors keep the same.
-                    # 1/2/3 -> A -> ...
-                    # ---
-                    # 1     -> A -> ...
-                    # 2/3   -> A'-> ...
-                    if not update:
-                        _orig_edge_attr = self.succ_dg[pred_nn].pop(node.name)
-                        new_edge_attr = EdgeAttr(pred_edge, _orig_edge_attr.distance)
-
-                        self.succ_dg[pred_nn][copied.name] = new_edge_attr
-                        self.pred_dg[copied.name][pred_nn] = new_edge_attr
 
+            if copied.name not in self.pred_dg.keys():
+                self.pred_dg[copied.name] = dict()
+
+            for pred_nn in grab_pred_nodes:
+                pred_edge = pred_nodes[pred_nn].edge
+                pred_edge.target = copied
+                # If don't _update_graph(), update partial information:
+                # 1. Remove the original connection & add the copied node & the edge
+                # with the modified target to succ_nodes_dict & succ_dg.
+                # 2. Update the in-degree of copied node = len(grab).
+                # 3. The out-degree of predecessors keep the same.
+                # 1/2/3 -> A -> ...
+                # ---
+                # 1     -> A -> ...
+                # 2/3   -> A'-> ...
                 if not update:
-                    self.degree_of_nodes[node.name].in_degree -= len(grab_pred_nodes)
-                    self.degree_of_nodes[copied.name].in_degree = len(grab_pred_nodes)
+                    _orig_edge_attr = self.succ_dg[pred_nn].pop(node.name)
+                    new_edge_attr = EdgeAttr(pred_edge, _orig_edge_attr.distance)
+
+                    self.succ_dg[pred_nn][copied.name] = new_edge_attr
+                    self.pred_dg[copied.name][pred_nn] = new_edge_attr
+
+            if not update:
+                self.degree_of_nodes[node.name].in_degree -= len(grab_pred_nodes)
+                self.degree_of_nodes[copied.name].in_degree = len(grab_pred_nodes)
 
         if keep_succ_conn:
             orig_oud = self.degree_of_nodes[node.name].out_degree
@@ -456,30 +456,30 @@ def _copy_succ_conn(
                     f"not all nodes in 'grab_succ_nodes' are in node {node.name}'s successors."
                     f"Got {', '.join(grab_succ_nodes)}, but successors are {', '.join(succ_nodes)}."
                 )
-            else:
-                for succ_nn in grab_succ_nodes:
-                    succ_edge = succ_nodes[succ_nn].edge
-                    succ_edge.source = copied
-                    # If don't _update_graph(), update partial information:
-                    # 1. Remove the original connection & add the copied node & the edge
-                    # with the modified target to succ_dg.
-                    # 2. Update the out-degree of copied node = len(grab).
-                    # 3. The in-degree of successors keep the same.
-                    # ... -> A -> 1/2/3
-                    # ---
-                    # ... -> A -> 1
-                    # ... -> A'-> 2/3
-                    if not update:
-                        self.succ_dg[node.name].pop(succ_nn)
-                        _orig_edge_attr = self.pred_dg[succ_nn].pop(node.name)
-                        new_edge_attr = EdgeAttr(succ_edge, _orig_edge_attr.distance)
-                        self.succ_dg[copied.name] = {succ_nn: new_edge_attr}
-                        self.pred_dg[succ_nn][copied.name] = new_edge_attr
-                        # self.pred_dg = reverse_edges2(self.succ_dg)
 
+            for succ_nn in grab_succ_nodes:
+                succ_edge = succ_nodes[succ_nn].edge
+                succ_edge.source = copied
+                # If don't _update_graph(), update partial information:
+                # 1. Remove the original connection & add the copied node & the edge
+                # with the modified target to succ_dg.
+                # 2. Update the out-degree of copied node = len(grab).
+                # 3. The in-degree of successors keep the same.
+                # ... -> A -> 1/2/3
+                # ---
+                # ... -> A -> 1
+                # ... -> A'-> 2/3
                 if not update:
-                    self.degree_of_nodes[node.name].out_degree -= len(grab_succ_nodes)
-                    self.degree_of_nodes[copied.name].out_degree = len(grab_succ_nodes)
+                    self.succ_dg[node.name].pop(succ_nn)
+                    _orig_edge_attr = self.pred_dg[succ_nn].pop(node.name)
+                    new_edge_attr = EdgeAttr(succ_edge, _orig_edge_attr.distance)
+                    self.succ_dg[copied.name] = {succ_nn: new_edge_attr}
+                    self.pred_dg[succ_nn][copied.name] = new_edge_attr
+                    # self.pred_dg = reverse_edges2(self.succ_dg)
+
+            if not update:
+                self.degree_of_nodes[node.name].out_degree -= len(grab_succ_nodes)
+                self.degree_of_nodes[copied.name].out_degree = len(grab_succ_nodes)
 
         if update:
             self._update_graph()
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 91f7ed70..74626759 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import sys
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -38,7 +39,7 @@
     "AxonSegment",
     "CoreAbstract",
     "SuccGroup",
-    "RouteGroup",
+    "MergedSuccGroup",
 ]
 
 NodeName: TypeAlias = str
@@ -107,34 +108,48 @@ class PartitionedEdges(NamedTuple):
 NeuSlice: TypeAlias = slice
 
 
+@dataclass(frozen=True)
 class SuccGroup:
-    # edge for input to nodes[i] is edges[i]
-    def __init__(self, nodes: list[NodeType], edges: list[EdgeType], input: NodeType):
-        self.nodes = nodes
-        self.edges = edges
-        self.input = input
+    """A node and all its successor nodes & edges are grouped into a `SuccGroup`."""
 
+    input: NodeType
+    nodes: list[NodeType]
+    edges: list[EdgeType]  # len(edges) == len(nodes)
 
-class RouteGroup:
-    def __init__(self):
-        self.groups: list[SuccGroup] = list()
+    def __eq__(self, other: "SuccGroup") -> bool:
+        return self.input == other.input
+
+    def __hash__(self) -> int:
+        return hash(self.input)
+
+
+class MergedSuccGroup:
+    """SuccGroups with intersecting nodes will be merged into a `MergedSuccGroup`."""
+
+    def __init__(self, *init_sgrp: SuccGroup) -> None:
         self.nodes: set[NodeType] = set()
-        self.inputs: dict[NodeType, list[EdgeType]] = dict()
+        self.groups: list[SuccGroup] = list()
+
+        if init_sgrp:
+            for sgrp in init_sgrp:
+                self.add_group(sgrp)
 
-    def add_group(self, group: SuccGroup):
+    def add_group(self, group: SuccGroup) -> None:
         self.groups.append(group)
         self.nodes.update(group.nodes)
 
-    def set_inputs(self):
+    @property
+    def outputs(self) -> dict[NodeType, list[EdgeType]]:
+        onodes = defaultdict(list)
         for group in self.groups:
             for node, edge in zip(group.nodes, group.edges):
-                if node not in self.inputs.keys():
-                    self.inputs[node] = list()
                 assert edge.dest.name == node.name
-                self.inputs[node].append(edge)
+                onodes[node].append(edge)
+
+        return onodes
 
-    def dump(self):
-        print("RouteGroup:")
+    def dump(self) -> None:
+        print("MergedSuccGroup:")
         for group in self.groups:
             print(f"\tGroup: of {group.input.name}")
             for node, edge in zip(group.nodes, group.edges):

From 66b8fcb7610de67009a51ab236de3d462ee3214c Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 28 Oct 2024 10:14:42 +0800
Subject: [PATCH 108/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20remove?=
 =?UTF-8?q?d=20old=20routing=20classes,=20simplified=20`RoutingManager`,?=
 =?UTF-8?q?=20and=20renamed=20`RoutingRoot`=20to=20`RoutingManager`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/routing.py     | 961 +++++++---------------------------
 tests/backend/conftest.py     |  35 +-
 tests/backend/test_routing.py | 475 ++---------------
 3 files changed, 240 insertions(+), 1231 deletions(-)

diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 127c4831..f2084741 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -1,26 +1,35 @@
 import itertools
 import math
-from collections.abc import Generator, Iterator, Sequence
-from typing import Any, Optional, Union, final
+import sys
+from collections.abc import Generator, Iterator
+from typing import Any, ClassVar, Union
 
 from paicorelib import ROUTING_DIRECTIONS_IDX as DIREC_IDX
-from paicorelib import ChipCoord, Coord, HwConfig, RoutingCoord, RoutingCost
+from paicorelib import ChipCoord, Coord, HwConfig, RoutingCoord
 from paicorelib import RoutingDirection as Direction
 from paicorelib import RoutingLevel as Level
-from paicorelib import RoutingStatus as Status
-from paicorelib import get_routing_consumption
 from paicorelib.routing_defs import MAX_ROUTING_PATH_LENGTH
 
-from paibox.exceptions import GraphBuildError, ResourceError, RoutingError
+from paibox.exceptions import (
+    ResourceError,
+    RoutingError,
+    GraphBuildError,
+    PAIBoxDeprecationWarning,
+)
 
 from .conf_types import CorePlmConfInChip
-from .placement import CoreBlock, CorePlacement, EmptyCorePlacement
+from .placement import CoreBlock, EmptyCorePlacement
 from .types import *
 
-__all__ = ["RoutingGroup", "RoutingRoot"]
+if sys.version_info >= (3, 13):
+    from typing import deprecated
+else:
+    from typing_extensions import deprecated
 
+__all__ = ["RoutingGroup", "RoutingManager"]
 
-def Coord2RoutingCoord(coord: Coord) -> RoutingCoord:
+
+def _Coord2RoutingCoord(coord: Coord) -> RoutingCoord:
     directions: list[Direction] = []
     x = coord.x
     y = coord.y
@@ -28,630 +37,187 @@ def Coord2RoutingCoord(coord: Coord) -> RoutingCoord:
     for i in range(MAX_ROUTING_PATH_LENGTH):
         # 每个循环，提取最高位（移动了 4-i 位）到最低位，恢复 value_x 和 value_y
         shift = 4 - i
-        value_x = (x >> shift) & 0b1  # 取出当前位的值
-        value_y = (y >> shift) & 0b1
+        value_x, value_y = (x >> shift) & 0b1, (y >> shift) & 0b1
         directions.append(Direction((value_x, value_y)))
-    return RoutingCoord(*directions)
-
-
-class RoutingCluster:
-    def __init__(
-        self,
-        level: Level,
-        direction: Direction = Direction.ANY,
-        *,
-        data: Optional[CorePlacement] = None,
-        status: Optional[Status] = None,
-        tag: Optional[str] = None,
-        include_online: bool = False,
-        parent: Optional["RoutingCluster"] = None,
-    ) -> None:
-        """Instance a tree cluster with `level` and `direction`.
-        - For a Lx(>0)-level cluster, after created, the length of children is `node_capacity`.
-        - For a L0-level cluster, it's a leaf.
-
-        Args:
-            - level: the cluster level.
-            - data: the data hanging on the cluster. Optional.
-            - direction: the direction of the cluster itself. Default is `Direction.ANY`.
-            - tag: a tag for user to identify. Optional.
-
-        Attributes:
-            - level: the cluster level.
-            - children: the children of the cluster.
-            - d: the direction of the cluster, relative to its parent.
-            - item: the data hanging on the cluster.
-            - tag: a tag for user to identify.
-            - status: the status of the cluster. Only for L0-level leaves.
-        """
-        self.level = level
-        self.children: dict[Direction, RoutingCluster] = dict()
-        self.d = direction
-        self.item = data
-        self.tag = tag
-        self.include_online = include_online
-        self.parent = parent
-
-        # Only set the attribute for L0-level cluster.
-        if self.level == Level.L0:
-            setattr(self, "status", status)
-
-    def clear(self) -> None:
-        """Clear the tree."""
-
-        def dfs(root: RoutingCluster) -> None:
-            root.children.clear()
-            if root.level == Level.L1:
-                return
-
-            for child in root.children.values():
-                dfs(child)
-
-        if self.level > Level.L0:
-            dfs(self)
-
-    def create_child(self, **kwargs) -> Optional["RoutingCluster"]:
-        """Create a child. If full, return None."""
-        child = RoutingCluster(Level(self.level - 1), **kwargs)
-
-        if not self.add_child(child):
-            return None
-
-        return child
-
-    def add_child(
-        self, child: "RoutingCluster", check_hit_online: bool = False
-    ) -> bool:
-        if self.level == Level.L0:
-            # L0-level cluster cannot add child.
-            raise AttributeError("L0-level cluster cannot add child.")
-
-        if self.is_full():
-            return False
-
-        for d in DIREC_IDX:
-            if d not in self:
-                return self.add_child_to(child, d, check_hit_online)
-
-        return False
-
-    def add_child_to(
-        self, child: "RoutingCluster", d: Direction, check_hit_online: bool = False
-    ) -> bool:
-        """Add a child cluster to a certain `direction`."""
-        if self.level - child.level != 1:
-            raise ValueError("cannot skip more than 1 level.")
-
-        if d in self:
-            return False
-
-        if d == Direction.X1Y1 or self.level in (Level.L1, Level.L2):
-            if self.include_online and check_hit_online:
-                return False
-            else:
-                child.include_online = self.include_online
-
-        # child.direction = d. Already done in `self[d]`(__setitem__).
-        self[d] = child
-        child.parent = self
-
-        return True
-
-    def remove_child(
-        self,
-        d: Direction,
-        revert_direc: Direction = Direction.ANY,
-        strict: bool = False,
-    ) -> Optional["RoutingCluster"]:
-        child = self.children.pop(d, None)
-
-        if child is None:
-            if strict:
-                raise RoutingError(f"removed child of {d} from {self} failed.")
-            else:
-                return None
-
-        # Revert the properties that were modified in the previous insertion.
-        child.include_online = False
-        child.d = revert_direc
-
-        return child
-
-    def find_cluster_by_path(
-        self, path: Sequence[Direction]
-    ) -> Optional["RoutingCluster"]:
-        """Find the cluster by given a routing path.
-
-        Description:
-            Find by starting at this level based on the routing path provided.
-            Take `path[0]` each time and do a recursive search.
-        """
-        if len(path) == 0:
-            return self
-
-        if len(path) > self.level:
-            raise ValueError(
-                f"the length of path must be no more than {self.level}, but got {len(path)}."
-            )
-
-        if path[0] not in self:
-            return None
-
-        sub_cluster = self[path[0]]
-
-        if len(path) > 1:
-            return sub_cluster.find_cluster_by_path(path[1:])
-        else:
-            return sub_cluster
-
-    def get_routing_path(self, cluster: "RoutingCluster") -> Optional[list[Direction]]:
-        """Return a direction path from L4 to the level of `cluster`.
-
-        Args:
-            - cluster: the cluster with level <= `self.level`.
-
-        Return:
-            - A list of `Direction` from L4 to L0.
-        """
-        if cluster.level > self.level:
-            raise ValueError(
-                "cannot get routing path because the level cluster is higher."
-            )
-
-        if cluster.level == self.level:
-            if cluster != self:
-                return None
-
-            return []
-
-        path = []
-
-        def dfs(root: RoutingCluster) -> bool:
-            for d, child in root.children.items():
-                path.append(d)
-
-                if child is cluster:
-                    return True
-                elif dfs(child):
-                    return True
-                else:
-                    path.pop(-1)
-
-            return False
-
-        if dfs(self):
-            return path
-
-    def is_full(self) -> bool:
-        return len(self.children) == self.node_capacity
-
-    def is_empty(self) -> bool:
-        return len(self.children) == 0
-
-    def n_child_avail(self) -> int:
-        return self.node_capacity - len(self.children)
-
-    def _find_lx_cluster_with_n_child_avail(
-        self, lx: Level, n_child_avail: int, method: str = "nearest"
-    ) -> Optional["RoutingCluster"]:
-        """Find the child of level `lx` with at least `n_child_avail` child available."""
-        if lx > self.level:
-            raise ValueError
-
-        if lx == self.level:
-            if self.n_child_avail() >= n_child_avail:
-                return self
-            else:
-                return None
-
-        if not self.is_empty():
-            for d in DIREC_IDX:
-                if d in self:
-                    cluster = self[d]._find_lx_cluster_with_n_child_avail(
-                        lx, n_child_avail, method
-                    )
-                    if cluster is not None:
-                        return cluster
-
-        child = self.create_child()
-        if not child:
-            return None
-
-        return child._find_lx_cluster_with_n_child_avail(lx, n_child_avail, method)
-
-    def add_subtree(
-        self,
-        subtree: "RoutingCluster",
-        check_hit_online: bool,
-    ) -> bool:
-        """Add the subtree's children to itself. If successful, return the added parent cluster."""
-        if subtree.level > self.level:
-            raise ValueError(
-                f"subtree's level {subtree.level} must be no more than the current level {self.level}."
-            )
-
-        if subtree.level == self.level:
-            sub_n_child = len(subtree.children)
-            if self.n_child_avail() < sub_n_child:
-                return False
-
-            if sub_n_child == 1:
-                return self.add_child(subtree[Direction.X0Y0], check_hit_online)
-
-            elif sub_n_child == 2:
-                # len(self.children) == 0, place in [0,1]
-                # len(self.children) == 1, place in [2,3]
-                if len(self.children) == 0:
-                    _place_idx = (0, 1)
-                else:  # 2 & 3
-                    _place_idx = (2, 3)
-
-                hit_online = False
-
-                for i in range(sub_n_child):
-                    success = self.add_child_to(
-                        subtree[DIREC_IDX[i]],
-                        DIREC_IDX[_place_idx[i]],
-                        check_hit_online,
-                    )
-                    hit_online |= not success
-
-                if hit_online:
-                    # If any of the subtrees fail to insert, the inserted subtrees are removed.
-                    for i in range(sub_n_child):
-                        removed = self.remove_child(
-                            DIREC_IDX[_place_idx[i]], DIREC_IDX[i], strict=False
-                        )
-                        subtree[DIREC_IDX[i]].parent = subtree
-
-                    return False
-
-            elif sub_n_child == 4:
-                if self.include_online and check_hit_online:
-                    return False
-
-                self.children = subtree.children
-                # Because the tree is inserted using depth-first order, when a node is
-                # encountered with no child, it must be on the far right.
-                self[Direction.X1Y1].include_online = True
-                for child in self.children.values():
-                    child.parent = self
 
-            else:
-                raise ValueError(f"the number of {sub_n_child} child is invalid.")
-
-            return True
-
-        # subtree.level < self.level
-        if not self.is_empty():
-            for d in DIREC_IDX:
-                if d in self:
-                    flag = self[d].add_subtree(subtree, check_hit_online)
-                    if flag:
-                        return True
-
-        child = self.create_child()
-        if not child:
-            return False
-
-        return child.add_subtree(subtree, check_hit_online)
-
-    @classmethod
-    def create_lx_full_tree(
-        cls,
-        lx: Level,
-        d: Direction = Direction.X0Y0,
-        root_tag: Optional[str] = None,
-    ) -> "RoutingCluster":
-        root = RoutingCluster(lx, d, tag=root_tag)
-
-        if lx > Level.L1:
-            for i in range(root.node_capacity):
-                child = cls.create_lx_full_tree(
-                    Level(lx - 1), DIREC_IDX[i], f"L{lx-1}_{i}"
-                )
-                if not root.add_child(child):
-                    raise ValueError
-
-        return root
-
-    @classmethod
-    def create_routing_tree(cls, lx: Level, n_branch: int) -> "RoutingCluster":
-        """Create a routing tree with `n_branch` child.
-
-        NOTE: When lx == L1, do not create the L0-level child. \
-            WHen lx > L1, create the lx-1 level child.
-        """
-        if n_branch < 0 or n_branch > HwConfig.N_SUB_ROUTING_NODE:
-            raise ValueError(f"#N of branches out of range, got {n_branch}.")
-
-        if lx == Level.L0:
-            raise ValueError("do not create L0-level node directly.")
-
-        root = RoutingCluster(lx, Direction.X0Y0)
-
-        # Create `n_branch` children when lx > L1.
-        if lx > Level.L1:
-            for i in range(n_branch):
-                child = cls.create_lx_full_tree(Level(lx - 1), DIREC_IDX[i])
-                if not root.add_child(child):
-                    raise ValueError(f"add child {child} failed.")
-
-        return root
-
-    def add_L0_for_placing(self, data: Any = None, **kwargs) -> "RoutingCluster":
-        """Add L0 cluster for placing in the routing tree.
-
-        Args:
-            - data: the data attached to the L0-level cluster.
-            - kwargs: other arguments of the L0-level cluster, status, tag, etc.
-        """
-        cluster = RoutingCluster(Level.L0, data=data, **kwargs)
-
-        L1_cluster = self._find_lx_cluster_with_n_child_avail(Level.L1, 1)
-        if not L1_cluster:
-            raise RoutingError("available L1 cluster not found.")
-
-        if not L1_cluster.add_child(cluster):
-            raise RoutingError("add child to L1 cluster failed.")
-
-        return cluster
-
-    def find_lx_clusters(
-        self, lx: Level, n_child_avail_low: int = 0
-    ) -> list["RoutingCluster"]:
-        """Find all clusters at a `lx` level with at least `n_child_avail_low` child clusters."""
-        if lx > self.level:
-            return []
-
-        clusters = []
-
-        def dfs_preorder(root: RoutingCluster) -> None:
-            if root.level == lx:
-                if root.n_child_avail() >= n_child_avail_low:
-                    clusters.append(root)
-
-                return None
-
-            for d in DIREC_IDX:
-                if d in root:
-                    dfs_preorder(root[d])
-
-        dfs_preorder(self)
-        return clusters
-
-    def find_leaf_at_level(self, lx: Level) -> list["RoutingCluster"]:
-        """Find clusters with no child at the `lx` level."""
-        if lx == Level.L0:
-            return []
-
-        return self.find_lx_clusters(lx, self.node_capacity)
-
-    def breadth_of_lx(self, lx: Level) -> int:
-        """Get the number of clusters in the routing tree at the given level."""
-        clusters = self.find_lx_clusters(lx, 0)
-
-        return len(clusters)
-
-    def __getitem__(self, d: Direction) -> "RoutingCluster":
-        return self.children[d]
-
-    def __setitem__(self, d: Direction, child: "RoutingCluster") -> None:
-        self.children[d] = child
-        child.d = d  # Set the direction of the child.
-
-    def __str__(self) -> str:
-        _name = id(self) if self.tag is None else self.tag
-        return f"tree {_name} at {self.d.name} at level {self.level}"
+    return RoutingCoord(*directions)
 
-    def __iter__(self) -> Iterator[Direction]:
-        return self.children.__iter__()
 
-    def __contains__(self, d: Direction) -> bool:
-        return d in self.children
+class RoutingGroup:
+    """Each routing group should be able to route by single coord."""
 
-    @property
-    def node_capacity(self) -> int:
-        return HwConfig.N_SUB_ROUTING_NODE if self.level > Level.L0 else 0
+    _debug_id: ClassVar[int] = 0
+    """Class counter for debugging."""
 
-    @property
-    def routing_coord(self) -> RoutingCoord:
-        cur_cluster = self
-        path = [self.d]
+    def __init__(
+        self, unordered_cb: list[CoreBlock], ordered_rgrp: list["RoutingGroup"]
+    ) -> None:
+        self.unordered_cb: list[CoreBlock] = unordered_cb
+        self.ordered_rgrp: list["RoutingGroup"] = ordered_rgrp
+        self.routing_elems: list[Union[CoreBlock, "RoutingGroup"]] = (
+            unordered_cb + ordered_rgrp
+        )
+        self.offset: list[int] = [] # TODO Change a name
+        self.n_core_required: int = 0
+        """The actual number of cores required by the routing group."""
+        self.n_tail_waste: int = 0
+        """Waste cores at the tail of the routing group."""
 
-        while cur_cluster.parent is not None:
-            path.append(cur_cluster.parent.d)
-            cur_cluster = cur_cluster.parent
+        axons: set[SourceNodeType] = set()
+        for elem in self.routing_elems:
+            axons.update(elem.axons)
 
-        path = path[:-1]
+        self.axons: list[SourceNodeType] = list(axons)  # unordered
 
-        for _ in range(cur_cluster.level, Level.L5):
-            path.append(Direction.X0Y0)
+        self.assigned_coords: list[Coord] = []
+        """Assigned core coordinates in the routing group"""
+        self.wasted_coords: list[Coord] = []
+        """Wasted core coordinates in routing group"""
+        self.wasted_core_plm: dict[Coord, EmptyCorePlacement] = {}
+        """Wasted core placements"""
 
-        for _ in range(self.level):
-            path.insert(0, Direction.ANY)
+        """Status options"""
+        self.is_assigned = False
+        """Whether the coordinates of chip & cores are assigned."""
 
-        return RoutingCoord(*reversed(path))
+        # For debugging
+        self._id = RoutingGroup._debug_id
+        RoutingGroup._debug_id += 1
 
+    def set_core_required(self) -> None:
+        """Calculate the number of cores required for the routing group iteratively."""
+        for rgrp in self.ordered_rgrp:
+            rgrp.set_core_required()
 
-# each sub routing group should be able to route by single coord
-class SubRoutingGroup:
-    index = 0
+        # Record the used cores of the members, but not the actual amount.
+        n_core_used = 0
 
-    def __init__(
-        self,
-        unorder_elements: list[Union[CoreBlock, "SubRoutingGroup"]],
-        ordered_elements: list["SubRoutingGroup"],
-    ) -> None:
-        self.unorder_elements: list[Union[CoreBlock, "SubRoutingGroup"]] = (
-            unorder_elements
-        )
-        self.ordered_elements: list["SubRoutingGroup"] = ordered_elements
-        self.routing_elements: list[Union[CoreBlock, "SubRoutingGroup"]] = (
-            unorder_elements + ordered_elements
-        )
-        self.offset: list[int] = list()
-        self.n_core_required: int = 0
-        self.tail_wasted: int = 0
-        self.name = f"SubRoutingGroup[{SubRoutingGroup.index}]"
-        axons: set[SourceNodeType] = set()
-        for element in self.routing_elements:
-            axons.update(element.axons)
-        self.axons: list[SourceNodeType] = list(axons)
-        SubRoutingGroup.index += 1
-
-    def set_config(self):
-        for element in self.routing_elements:
-            if isinstance(element, SubRoutingGroup):
-                element.set_config()
-
-        # unorder elements sorted from big to small, avoiding assigning waste.
-        unorder_elements = sorted(
-            self.unorder_elements, key=lambda x: x.n_core_required, reverse=True
+        # Unordered core blocks sorted in descending order, avoiding assigning waste.
+        unordered_cb = sorted(
+            self.unordered_cb, key=lambda x: x.n_core_required, reverse=True
         )
-        ordered_elements = self.ordered_elements
-        for element in unorder_elements:
-            n_core_required = element.n_core_required
+        for cb in unordered_cb:
             self.offset.append(self.n_core_required)
-            self.n_core_required += n_core_required
+            n_core_used += cb.n_core_required
 
-        # ordered elements should be assgined first
-        for element in ordered_elements:
-            n_core_required = element.n_core_required
-            n_core_assigned = _nearest_multiple_above(
-                self.n_core_required, n_core_required
-            )
+        # Ordered routing groups should be assgined first.
+        ordered_rgrp = self.ordered_rgrp
+        for rgrp in ordered_rgrp:
+            n_core_assigned = _nearest_multiple_above(n_core_used, rgrp.n_core_required)
             self.offset.append(n_core_assigned)
-            self.n_core_required = n_core_assigned + n_core_required
-
-        # routing elements should satisfy topological order
-        self.routing_elements: list[Union[CoreBlock, "SubRoutingGroup"]] = (
-            unorder_elements + ordered_elements
-        )
-
-        sub_tail_wasted = (
-            0
-            if isinstance(self.routing_elements[-1], CoreBlock)
-            else self.routing_elements[-1].tail_wasted
-        )
-        assigned_n_core_required = 1 << (self.n_core_required - 1).bit_length()
-        self.tail_wasted += (
-            assigned_n_core_required - self.n_core_required + sub_tail_wasted
-        )
-        self.n_core_required = assigned_n_core_required
-
-    # return Coord that wasted in subrouting group
-    def assign(
-        self, allocated: list[Coord], chip_coord: Coord
+            n_core_used = n_core_assigned + rgrp.n_core_required
+
+        # Routing elements need satisfy the topological order
+        self.routing_elems = unordered_cb + ordered_rgrp
+
+        # If there are ordered routing groups, the final amount wasted is the
+        # tail waste number of the LAST routing group. Otherwise, waste = 0.
+        n_tail_waste = ordered_rgrp[-1].n_tail_waste if ordered_rgrp else 0
+        # sub_tail_wasted = (
+        #     0
+        #     if isinstance(self.routing_elems[-1], CoreBlock)
+        #     else self.routing_elems[-1].n_tail_waste
+        # )
+
+        # This is the amount of cores required actually.
+        self.n_core_required = 1 << (n_core_used - 1).bit_length()
+        self.n_tail_waste = self.n_core_required - n_core_used + n_tail_waste
+
+    def assign_coord(
+        self, chip_coord: Coord, allocated: list[Coord]
     ) -> tuple[list[Coord], list[Coord]]:
+        """Assign core coordinates to the routing group."""
         cur_i = 0
         assigned_coords: list[Coord] = []
         wasted_coords: list[Coord] = []
-        for element, offset in zip(self.routing_elements, self.offset):
+
+        for elem, offset in zip(self.routing_elems, self.offset):
             if offset > cur_i:
-                wasted_coords = wasted_coords + allocated[cur_i:offset]
-            cur_i = offset
+                wasted_coords += allocated[cur_i:offset]
 
-            n = element.n_core_required
-            print(
-                f"element: {element.name}, {n} cores, start at {Coord2RoutingCoord(allocated[cur_i])}"
+            cur_i = offset
+            n = elem.n_core_required
+            # print(
+            #     f"element: {elem}, {n} cores, start at {_Coord2RoutingCoord(allocated[cur_i])}"
+            # )
+            assigned, wasted = elem.assign_coord(
+                chip_coord, allocated[cur_i : cur_i + n]
             )
-            assigned, wasted = element.assign(allocated[cur_i : cur_i + n], chip_coord)
-            assigned_coords = assigned_coords + assigned
-            wasted_coords = wasted_coords + wasted
+            assigned_coords += assigned
+            wasted_coords += wasted
             cur_i += n
-        return assigned_coords, wasted_coords + allocated[cur_i:]
 
-    # use list to keep the order of axons
-    def group_axons(self, multicast_axons: list[SourceNodeType]) -> None:
+        self.assigned_coords = assigned_coords
+        self.wasted_coords = wasted_coords + allocated[cur_i:]
+        self.is_assigned = True
+
+        return self.assigned_coords, self.wasted_coords
+
+    def group_axons(self, multicast_axons: list[SourceNodeType] = []) -> None:
+        """Group the axons, using list to keep the order of axons."""
+        if not all(cb._lcn_locked for cb in self.core_blocks):
+            raise GraphBuildError(
+                "get axon segments of core block after 'lcn_ex' is locked."
+            )
+
         private_multicast_axons = multicast_axons.copy()
-        axons_count: list[int] = [0] * len(self.axons)
-        for element in self.routing_elements:
-            for axon in element.axons:
-                idx = self.axons.index(axon)
-                axons_count[idx] += 1
-        for i, axon in enumerate(self.axons):
-            if axons_count[i] > 1 and axon not in private_multicast_axons:
-                private_multicast_axons.append(axon)
-
-        for element in self.routing_elements:
-            element.group_axons(private_multicast_axons)
+        ax_shared_times: list[int] = [0] * len(self.axons)
+
+        # Axons shared within a routing group also need to be multicast.
+        for elem in self.routing_elems:
+            for ax in elem.axons:
+                idx = self.axons.index(ax)
+                ax_shared_times[idx] += 1
+
+        for ax, times in zip(self.axons, ax_shared_times):
+            if times > 1 and ax not in private_multicast_axons:
+                private_multicast_axons.append(ax)
+
+        for elem in self.routing_elems:
+            elem.group_axons(private_multicast_axons)
 
     @property
     def core_blocks(self) -> list[CoreBlock]:
+        """Retrieve all core blocks within the routing group iteratively."""
         cbs = []
-        for element in self.routing_elements:
-            if isinstance(element, CoreBlock):
-                cbs.append(element)
+
+        for elem in self.routing_elems:
+            if isinstance(elem, CoreBlock):
+                cbs.append(elem)
             else:
-                cbs += element.core_blocks
+                cbs += elem.core_blocks
+
         return cbs
 
     @classmethod
-    def build(cls, route_group: RouteGroup) -> "SubRoutingGroup":
-
-        if len(route_group.nodes) == 0:
-            return None
-        sub_group = RouteGroup()
-        remaining_group = RouteGroup()
-        for group in route_group.groups:
-            if group.input in route_group.nodes:
-                sub_group.add_group(group)
-            else:
-                remaining_group.add_group(group)
-
-        remaining_group.nodes = remaining_group.nodes - sub_group.nodes
-        unorder_elements: list[CoreBlock] = CoreBlock.build_core_blocks(remaining_group)
-        ordered_elements: list[SubRoutingGroup] = []
-        sub_routing_group: SubRoutingGroup = SubRoutingGroup.build(sub_group)
-        if sub_routing_group is not None:
-            ordered_elements = [sub_routing_group]
-        return cls(unorder_elements, ordered_elements)
+    def build(cls, merged_sgrp: MergedSuccGroup) -> "RoutingGroup":
+        msgrp = MergedSuccGroup()
+        remaining = MergedSuccGroup()
 
-    def dump(self, i: int = 0):
-        tabs = "\t" * i
-        print(f"{tabs}SubRoutingGroup: {self.name} with {self.n_core_required} cores:")
-        for element in self.routing_elements:
-            if isinstance(element, SubRoutingGroup):
-                element.dump(i + 1)
+        for group in merged_sgrp.groups:
+            if group.input in merged_sgrp.nodes:
+                msgrp.add_group(group)
             else:
-                print(f"{tabs}\t{element.name} with {element.n_core_required} cores:")
-                for edge in element._parents:
-                    print(
-                        f"{tabs}\t\t{edge.name}: {edge.source.name} -> {edge.target.name}"
-                    )
+                remaining.add_group(group)
 
+        remaining.nodes -= msgrp.nodes
+        unordered_cb = CoreBlock.build_core_blocks(remaining)
 
-class RoutingGroup:
-    """Core blocks located within a routing group are routable.
-
-    NOTE: Axon groups within a routing group are the same.
-    """
-
-    def __init__(self, route_group: RouteGroup) -> None:
-        self.sub_routing_group: SubRoutingGroup = SubRoutingGroup.build(route_group)
-        self.core_blocks = self.sub_routing_group.core_blocks
-        self.assigned_coords: list[Coord] = []
-        """Assigned core coordinates in the routing group"""
-        self.wasted_coords: list[Coord] = []
-        """Wasted core coordinates in routing group"""
-        self.wasted_core_plm: dict[Coord, EmptyCorePlacement] = {}
-        """Wasted core placements"""
-        self.sub_n_core_wasted = 0
+        if len(msgrp.nodes) > 0:
+            sub_rgrp = RoutingGroup.build(msgrp)
+            ordered_rgrp = [sub_rgrp]
+        else:
+            ordered_rgrp = []
 
-    def assign(self, allocated: list[Coord], chip_coord: Coord) -> None:
-        print(
-            f"route_group: {self.sub_routing_group.name} assigned from {Coord2RoutingCoord(allocated[0])}"
-        )
-        assigned, wasted = self.sub_routing_group.assign(allocated, chip_coord)
-        self.assigned_coords = assigned
-        self.wasted_coords = wasted
+        return cls(unordered_cb, ordered_rgrp)
 
     def core_block_alloc(self) -> None:
+        assert self.is_assigned, "coordinates are not assigned."
+
         for cb in self:
             cb.core_plm_alloc()
 
-        # Allocate blank core placements for the wasted coordinates.
+        # Allocate empty core placements for the wasted coordinates.
         for coord in self.wasted_coords:
             self.wasted_core_plm[coord] = EmptyCorePlacement.build(coord)
 
@@ -663,29 +229,9 @@ def get_wasted_cplm_config(self) -> CorePlmConfInChip:
 
     def get_n_core_occupied(self) -> int:
         """Get the #N of cores occupied by the routing group."""
+        assert self.is_assigned, "coordinates are not assigned."
         return len(self.assigned_coords) + len(self.wasted_coords)
 
-    @property
-    def n_core_required(self) -> int:
-        """The actual number of cores required by the routing group."""
-        return sum(cb.n_core_required for cb in self)
-
-    @property
-    def n_core_cost(self) -> int:
-        return self.sub_routing_group.n_core_required
-
-    @property
-    def tail_wasted(self) -> int:
-        return self.sub_routing_group.tail_wasted
-
-    @property
-    def routing_cost(self) -> RoutingCost:
-        return get_routing_consumption(self.n_core_required)
-
-    @property
-    def routing_level(self) -> Level:
-        return self.routing_cost.get_routing_level()
-
     @property
     def chip_coord(self) -> ChipCoord:
         if not all(cb.chip_coord == self[0].chip_coord for cb in self):
@@ -695,6 +241,19 @@ def chip_coord(self) -> ChipCoord:
 
         return self[0].chip_coord
 
+    def dump(self, i: int = 0) -> None:
+        tabs = "\t" * i
+        print(f"{tabs}RoutingGroup: {self} with {self.n_core_required} cores:")
+        for elem in self.routing_elems:
+            if isinstance(elem, RoutingGroup):
+                elem.dump(i + 1)
+            else:
+                print(f"{tabs}\t{elem.name} with {elem.n_core_required} cores:")
+                for edge in elem._parents:
+                    print(
+                        f"{tabs}\t\t{edge.name}: {edge.source.name} -> {edge.target.name}"
+                    )
+
     def __contains__(self, cb: CoreBlock) -> bool:
         return cb in self.core_blocks
 
@@ -704,41 +263,23 @@ def __getitem__(self, idx: int) -> CoreBlock:
     def __iter__(self) -> Iterator[CoreBlock]:
         return self.core_blocks.__iter__()
 
-    def group_axons(self) -> None:
-        for cb in self.core_blocks:
-            if not cb._lcn_locked:
-                raise GraphBuildError("get axon segments after 'lcn_ex' is locked.")
-        self.sub_routing_group.group_axons([])
+    def __str__(self) -> str:
+        return f"RoutingGroup_{self._id}"
 
 
-@final
-class RoutingRoot:
+class RoutingManager:
     def __init__(self, chip_list: list[ChipCoord], **kwargs) -> None:
         """Initialize a routing quadtree root."""
         self.chip_list: list[ChipCoord] = chip_list
-        # Every L5 routing cluster is unique in each chip root.
-        self.chip_roots = [
-            RoutingCluster(Level.L5, include_online=True) for _ in range(len(chip_list))
-        ]
         self.used_L2_clusters: list[list[RoutingCoord]] = [
             list() for _ in range(len(chip_list))
         ]
-        """Used L2 clusters in each chip root. The clocks of unused L2 clusters can be turned off   \
+        """Used L2 clusters in each chip. The clocks of unused L2 clusters can be turned off   \
             through the serial port to reduce power consumption.
         """
         self.n_core_total: int = 0
         self.n_core_per_chip: list[int] = [0] * len(chip_list)
 
-    def get_leaf_coord(
-        self, root: RoutingCluster, leaf: RoutingCluster
-    ) -> RoutingCoord:
-        """Return the routing coordinate of the L0 leaf."""
-        path = root.get_routing_path(leaf)
-        if path:
-            return RoutingCoord(*path)
-
-        raise RoutingError(f"get leaf {leaf.tag} coordinate failed.")
-
     def get_insert_location(
         self, n_core_incoming: int, n_core_wasted: int
     ) -> tuple[int, int, list[Direction]]:
@@ -776,19 +317,21 @@ def get_insert_location(
 
         return core_loc, chip_idx_loc, routing_path
 
-    def place_routing_group(self, routing_group: RoutingGroup) -> None:
+    def place_routing_group(
+        self, rgrp: RoutingGroup
+    ) -> tuple[list[Coord], list[Coord]]:
         """Place a routing group in the chip list. Assign each core blocks with routing coordinates &   \
             make sure they are routable.
+        
+        Returns: a tuple of lists of assigned and wasted coordinates.
         """
-        print("Routing Group:")
-        for cb in routing_group:
-            print(f"\t{cb.name}")
-
-        n_core_cost = routing_group.n_core_cost
-        tail_wasted = routing_group.tail_wasted
-        n_core_req = n_core_cost - tail_wasted
-        print(f"\tcost: {n_core_cost}, tail_wasted: {tail_wasted}")
+        # for cb in rgrp:
+        #     print(f"\t{cb.name}")
+        n_core_cost = rgrp.n_core_required
+        n_tail_waste = rgrp.n_tail_waste
+        n_core_req = n_core_cost - n_tail_waste
 
+        # Check whether a single routing group can be placed within a single core.
         if n_core_req > HwConfig.N_CORE_OFFLINE:
             raise ResourceError(
                 "the number of cores required by the routing group exceeds the hardware limit, "
@@ -796,147 +339,29 @@ def place_routing_group(self, routing_group: RoutingGroup) -> None:
             )
 
         core_insert_loc, chip_idx_loc, rpath_start = self.get_insert_location(
-            n_core_cost, tail_wasted
+            n_core_cost, n_tail_waste
         )
-        allocated_coords: list[Coord] = []
 
+        allocated_coords: list[Coord] = []
         for i, rpath in _routing_path_generator(n_core_cost, rpath_start):
             leaf_coord = RoutingCoord(*reversed(rpath))
             # Record the used L2 clusters
             if (core_insert_loc + i) % (HwConfig.N_SUB_ROUTING_NODE**Level.L2) == 0:
                 L2_coord = RoutingCoord(*reversed(rpath[Level.L2 :]))
                 self.used_L2_clusters[chip_idx_loc].append(L2_coord)
-            allocated_coords.append(leaf_coord.to_coord())
-
-        routing_group.assign(allocated_coords, self.chip_list[chip_idx_loc])
-        print()
-
-    def insert_routing_group(self, routing_group: RoutingGroup) -> bool:
-        """Insert a `RoutingGroup` in the routing tree. Assign each core blocks with \
-            routing coordinates & make sure they are routable.
-
-        NOTE: Use depth-first search to insert each core block into the routing tree \
-            to ensure that no routing deadlock occurs between core blocks.
-        """
-        cost = routing_group.routing_cost
-        level = routing_group.routing_level
-        if routing_group.n_core_required > HwConfig.N_CORE_OFFLINE:
-            raise ResourceError(
-                f"the number of cores required by the routing group exceeds the hardware limit, "
-                f"{routing_group.n_core_required} > {HwConfig.N_CORE_OFFLINE}."
-            )
 
-        routing_cluster = RoutingCluster.create_routing_tree(level, cost[level - 1])
-
-        # `n_L0` physical cores will be occupied.
-        #   - For the first `n_core_required` cores, they are used for placement.
-        #   - For the rest, they are unused.
-        # Make sure the routing cluster is successfully inserted to the root
-        # then assign coordinates & status.
-        leaves = []
-        wasted = []
-
-        if cost.n_L0 > HwConfig.N_CORE_OFFLINE:
-            _max_n_l0 = HwConfig.N_CORE_OFFLINE
-        else:
-            _max_n_l0 = cost.n_L0
-
-        for i in range(routing_group.n_core_required):
-            l0 = routing_cluster.add_L0_for_placing(
-                data=f"rg_{id(routing_group)}_{i}",
-                status=Status.USED,
-                tag=f"rg_{id(routing_group)}_{i}",
-            )
-            leaves.append(l0)
-
-        for i in range(routing_group.n_core_required, _max_n_l0):
-            l0 = routing_cluster.add_L0_for_placing(
-                status=Status.OCCUPIED, tag=f"rg_{id(routing_group)}_{i}"
-            )
-            wasted.append(l0)
-
-        # If #N of wasted cores > 16, it won't hit online L2 cluster.
-        # XXX 'check_hit_online' conditions could be more precise, but
-        # there is no clear benefit to doing so at the moment.
-        check_hit_online = (
-            _max_n_l0 - routing_group.n_core_required
-        ) <= HwConfig.N_CORE_ONLINE
-
-        # Add the sub-tree to the root.
-        flag = False
-        # TODO For now, use sequential attempt.
-        for chip_coord, chip_root in zip(self.chip_list, self.chip_roots):
-            flag = chip_root.add_subtree(routing_cluster, check_hit_online)
-            if flag:
-                break
-
-        if not flag:
-            raise RoutingError(
-                f"insert routing group 0x{id(routing_group):x} into the routing tree failed, "
-                f"cannot insert to any chip."
-            )
-
-        # TODO Consider obtaining the root coord of the `routing_cluster` applied for when inserting,
-        # and calculate all leaf coords according to the size of the routing group. Instead of
-        # recording all the leaves and then looking up their coordinates in the tree.
-        valid_coords = []
-        wasted_coords = []
-        for cluster in leaves:
-            coord = self.get_leaf_coord(chip_root, cluster)
-            valid_coords.append(coord.to_coord())
-
-        for cluster in wasted:
-            coord = self.get_leaf_coord(chip_root, cluster)
-            wasted_coords.append(coord.to_coord())
-
-        routing_group.assign(valid_coords, wasted_coords, chip_coord)
-
-        return True
-
-    def clear(self) -> None:
-        for root in self:
-            root.clear()
-
-    def breadth_of_lx(self, lx: Union[Level, int], chip_idx: int = -1) -> int:
-        """Get the breadth of the given level at chip root #idx.
-
-        Args:
-            - lx: the level to find.
-            - chip_idx: the chip root index. If it is -1, return the sum of the breadth on all roots.
-        """
-        if chip_idx == -1:
-            return sum(chip_root.breadth_of_lx(Level(lx)) for chip_root in self)
-
-        return self[chip_idx].breadth_of_lx(Level(lx))
-
-    def __getitem__(self, index: int) -> RoutingCluster:
-        return self.chip_roots[index]
-
-    def __iter__(self) -> Iterator[RoutingCluster]:
-        return self.chip_roots.__iter__()
-
-
-def get_parent(
-    tree: RoutingCluster, cluster: RoutingCluster
-) -> Optional[RoutingCluster]:
-    """Get the parent cluster of the given cluster. If not found, return None."""
-    assert tree != cluster
+            allocated_coords.append(leaf_coord.to_coord())
 
-    def dfs_preorder(
-        tree: RoutingCluster, cluster: RoutingCluster
-    ) -> Optional[RoutingCluster]:
-        for d in DIREC_IDX:
-            if d in tree:
-                if tree[d] is cluster:
-                    return tree
-                else:
-                    parent = dfs_preorder(tree[d], cluster)
-                    if parent:
-                        return parent
+        return rgrp.assign_coord(self.chip_list[chip_idx_loc], allocated_coords)
 
-        return None
 
-    return dfs_preorder(tree, cluster)
+@deprecated(
+    "'RoutingRoot' is deprecated in version 1.2.0 and will be "
+    "removed in version 1.3.0. Use `RoutingManager` instead.",
+    category=PAIBoxDeprecationWarning,
+)
+class RoutingRoot(RoutingManager):
+    pass
 
 
 def _nearest_multiple_above(a: int, x: int) -> int:
diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index c4bf6f54..ad7e94a2 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -3,49 +3,16 @@
 
 import numpy as np
 import pytest
-from paicorelib import LCN_EX, RoutingCoord, RoutingDirection, RoutingLevel
+from paicorelib import LCN_EX, RoutingCoord, RoutingDirection
 from paicorelib import WeightWidth as WW
 
 import paibox as pb
-from paibox.backend.routing import RoutingCluster
 from paibox.backend.types import AxonCoord, AxonSegment, NeuSegment
 from paibox.exceptions import ResourceError
 from paibox.node import NodeList
 from tests.conftest import ParametrizedTestData
 
 
-@pytest.fixture
-def build_example_root():
-    """Example root.
-
-    Structure:
-        L3: root
-        L2_1: L1_1
-        L2_2: L1_2, L1_3, L1_4, L1_5
-    """
-    root = RoutingCluster(RoutingLevel.L3, tag="L3")
-
-    node_l2_1 = RoutingCluster(RoutingLevel.L2, tag="L2_1")
-    node_l2_2 = RoutingCluster(RoutingLevel.L2, tag="L2_2")
-
-    node_l1_1 = RoutingCluster(RoutingLevel.L1, tag="L1_1")
-    node_l1_2 = RoutingCluster(RoutingLevel.L1, tag="L1_2")
-    node_l1_3 = RoutingCluster(RoutingLevel.L1, tag="L1_3")
-    node_l1_4 = RoutingCluster(RoutingLevel.L1, tag="L1_4")
-    node_l1_5 = RoutingCluster(RoutingLevel.L1, tag="L1_5")
-
-    node_l2_1.add_child_to(node_l1_1, RoutingDirection.X0Y0)
-    node_l2_2.add_child_to(node_l1_2, RoutingDirection.X0Y0)
-    node_l2_2.add_child_to(node_l1_3, RoutingDirection.X0Y1)
-    node_l2_2.add_child_to(node_l1_4, RoutingDirection.X1Y0)
-    node_l2_2.add_child_to(node_l1_5, RoutingDirection.X1Y1)
-
-    root.add_child_to(node_l2_1, RoutingDirection.X0Y0)
-    root.add_child_to(node_l2_2, RoutingDirection.X0Y1)
-
-    return root
-
-
 class NetForTest1(pb.Network):
     """INP1 -> S1 -> N1 -> S2 -> N2 -> S3 -> N3"""
 
diff --git a/tests/backend/test_routing.py b/tests/backend/test_routing.py
index 2e1fb2d3..9d3bb8cd 100644
--- a/tests/backend/test_routing.py
+++ b/tests/backend/test_routing.py
@@ -1,19 +1,10 @@
 import random
-from contextlib import nullcontext
 
-import numpy as np
 import pytest
 from paicorelib import Coord, HwConfig, RoutingDirection, RoutingLevel
 
 import paibox as pb
-from paibox.backend.routing import (
-    RoutingCluster,
-    RoutingCoord,
-    RoutingRoot,
-    get_parent,
-    get_unused_lx,
-)
-from paibox.exceptions import RoutingError
+from paibox.backend.routing import RoutingManager, get_unused_lx
 
 from .conftest import gen_random_used_lx
 
@@ -30,263 +21,6 @@
 L0 = RoutingLevel.L0
 
 
-class TestRouterTree:
-    def test_basics(self):
-        root = RoutingCluster(L3, tag="L3")
-
-        cluster_l2_1 = RoutingCluster(L2, tag="L2_1")
-        cluster_l2_2 = RoutingCluster(L2, tag="L2_2")
-        cluster_l2_3 = RoutingCluster(L2, tag="L2_3")
-
-        assert root.add_child(cluster_l2_1) == True
-        assert root.add_child_to(cluster_l2_2, X1Y1) == True
-
-        cluster1 = root.create_child(tag="L2_created")  # X0Y1
-        assert cluster1 is not None
-        assert len(root.children) == 3
-
-        assert root.add_child_to(cluster_l2_3, X1Y1) == False
-        assert len(root.children) == 3
-
-        cluster2 = root.create_child(tag="L2_created2")  # X1Y0
-        assert cluster2 is not None
-        assert len(root.children) == 4
-        assert root.children[X1Y0] == cluster2
-
-        cluster3 = root.create_child(tag="L2_created3")
-        assert cluster3 is None
-
-    def test_clear(self):
-        root = RoutingCluster(L3, tag="L3")
-
-        cluster_l2_1 = RoutingCluster(L2, tag="L2_1")
-        cluster_l2_2 = RoutingCluster(L2, tag="L2_2")
-        cluster_l2_3 = RoutingCluster(L2, tag="L2_3")
-
-        cluster_l1_1 = RoutingCluster(L1, tag="L1_1")
-        cluster_l1_2 = RoutingCluster(L1, tag="L1_2")
-        cluster_l1_3 = RoutingCluster(L1, tag="L1_3")
-
-        assert cluster_l2_1.add_child_to(cluster_l1_1, X0Y0) == True
-        assert cluster_l2_2.add_child_to(cluster_l1_2, X0Y1) == True
-        assert cluster_l2_3.add_child_to(cluster_l1_3, X1Y0) == True
-
-        assert root.add_child_to(cluster_l2_1, X0Y0) == True
-        assert root.add_child_to(cluster_l2_2, X1Y1) == True
-        assert root.add_child_to(cluster_l2_3, X1Y0) == True
-
-        cluster_l2_2.clear()
-        assert len(cluster_l2_2.children) == 0
-
-        root.clear()
-        assert len(root.children) == 0
-
-    def test_remove_child(self, build_example_root):
-        root = build_example_root
-
-        assert root.remove_child(X0Y1, strict=True)
-        assert X0Y1 not in root
-
-        with pytest.raises(RoutingError):
-            root.remove_child(X1Y1, strict=True)
-
-    def test_find_cluster_by_path(self):
-        root = RoutingCluster(L3, tag="L3")
-
-        cluster_l2_1 = RoutingCluster(L2, tag="L2_1")
-        cluster_l2_2 = RoutingCluster(L2, tag="L2_2")
-        cluster_l2_3 = RoutingCluster(L2, tag="L2_3")
-
-        cluster_l1_1 = RoutingCluster(L1, tag="L1_1")
-        cluster_l1_2 = RoutingCluster(L1, tag="L1_2")
-        cluster_l1_3 = RoutingCluster(L1, tag="L1_3")
-
-        assert cluster_l2_1.add_child_to(cluster_l1_1, X0Y0) == True
-        assert cluster_l2_2.add_child_to(cluster_l1_2, X0Y1) == True
-        assert cluster_l2_3.add_child_to(cluster_l1_3, X1Y0) == True
-
-        assert root.add_child_to(cluster_l2_1, X0Y0) == True
-        assert root.add_child_to(cluster_l2_2, X1Y1) == True
-        assert root.add_child_to(cluster_l2_3, X1Y0) == True
-
-        find0 = root[X0Y0]
-        assert find0 == cluster_l2_1
-
-        find1 = root.find_cluster_by_path([X0Y0, X0Y0])
-        assert find1 == cluster_l1_1
-
-        find2 = root.find_cluster_by_path([X0Y0, X0Y1])
-        assert find2 is None
-
-        find3 = root.find_cluster_by_path([X1Y0, X1Y0])
-        assert find3 == cluster_l1_3
-
-        find4 = root.find_cluster_by_path([X1Y1, X1Y0])
-        assert find4 is None
-
-    def test_get_routing_path(self):
-        root = RoutingCluster(L3, tag="L3")
-
-        cluster_l2_1 = RoutingCluster(L2, tag="L2_1")
-        cluster_l2_2 = RoutingCluster(L2, tag="L2_2")
-        cluster_l2_3 = RoutingCluster(L2, tag="L2_3")
-
-        cluster_l1_1 = RoutingCluster(L1, tag="L1_1")
-        cluster_l1_2 = RoutingCluster(L1, tag="L1_2")
-        cluster_l1_3 = RoutingCluster(L1, tag="L1_3")
-        cluster_l1_4 = RoutingCluster(L1, tag="L1_4")
-
-        assert cluster_l2_1.add_child_to(cluster_l1_1, X0Y0) == True
-        assert cluster_l2_2.add_child_to(cluster_l1_2, X0Y1) == True
-        assert cluster_l2_3.add_child_to(cluster_l1_3, X1Y0) == True
-
-        assert root.add_child_to(cluster_l2_1, X0Y0) == True
-        assert root.add_child_to(cluster_l2_2, X1Y1) == True
-        assert root.add_child_to(cluster_l2_3, X1Y0) == True
-
-        assert root.get_routing_path(cluster_l2_1) == [X0Y0]
-        assert root.get_routing_path(cluster_l1_3) == [
-            X1Y0,
-            X1Y0,
-        ]
-
-        assert root.get_routing_path(cluster_l1_3) == [
-            X1Y0,
-            X1Y0,
-        ]
-        assert root.get_routing_path(cluster_l1_4) == None
-
-    def test_create_lx_full_tree(self):
-        root = RoutingCluster(L3, tag="L3")
-
-        cluster_l2_1 = RoutingCluster.create_lx_full_tree(L2, root_tag="L2_1")
-        cluster_l2_2 = RoutingCluster.create_lx_full_tree(L2, root_tag="L2_2")
-        cluster_l2_3 = RoutingCluster.create_lx_full_tree(L2, root_tag="L2_3")
-
-        assert root.add_child(cluster_l2_1) == True
-        assert root.add_child(cluster_l2_2) == True
-
-        assert root.add_child_to(cluster_l2_3, X1Y1, False) == True
-
-        assert len(root.children) == 3
-        assert X1Y0 not in root.children.keys()
-
-    def test_add_L0_for_placing(self):
-        subtree = RoutingCluster.create_routing_tree(L3, 2)
-        assert len(subtree.children) == 2
-
-        n = 6
-        for _ in range(n):
-            subtree.add_L0_for_placing()
-
-        find_l0_1 = subtree.find_leaf_at_level(L0)
-        find_l0_2 = subtree.find_lx_clusters(L0, 0)
-
-        find_l1_1 = subtree.find_lx_clusters(L1, 0)
-        find_l1_2 = subtree.find_lx_clusters(L1, 2)
-        find_l1_3 = subtree.find_lx_clusters(L1, 4)
-        find_l1_4 = subtree.find_leaf_at_level(L1)
-
-        find_l2 = subtree.find_lx_clusters(L2, 0)
-        find_l3 = subtree.find_lx_clusters(L3, 2)
-
-        assert len(find_l0_1) == 0
-        assert len(find_l0_2) == n
-        assert len(find_l1_1) == 8
-        assert len(find_l1_2) == 7
-        assert len(find_l1_3) == 6
-        assert len(find_l1_4) == 6
-        assert len(find_l2) == 2
-        assert len(find_l3) == 1
-
-        assert len(find_l1_1[0].children) == find_l1_1[0].node_capacity
-        assert len(find_l1_1[1].children) == n - len(find_l1_1[0].children)
-
-    def test_create_routing_tree(self):
-        """Test for `create_routing_tree()` & `find_empty_lx_clusters()`."""
-        # A L3-level routing tree.
-        subtree = RoutingCluster.create_routing_tree(L3, 2)
-
-        find_l2 = subtree.find_leaf_at_level(L2)
-        find_l1 = subtree.find_leaf_at_level(L1)
-
-        assert len(find_l2) == 0
-        assert len(find_l1) == 8
-
-        # A L4-level routing tree.
-        subtree = RoutingCluster.create_routing_tree(L4, 1)
-
-        find_l3 = subtree.find_leaf_at_level(L3)
-        find_l2 = subtree.find_leaf_at_level(L2)
-        find_l1 = subtree.find_leaf_at_level(L1)
-
-        assert len(find_l3) == 0
-        assert len(find_l2) == 0
-        assert len(find_l1) == 4 * 4
-
-    def test_add_subtree(self):
-        root = RoutingCluster(L4, tag="L4")
-        subtree = RoutingCluster.create_routing_tree(L3, 2)
-
-        n = 6
-        for _ in range(n):
-            subtree.add_L0_for_placing()
-
-        insert = root.add_subtree(subtree, False)
-
-        assert insert == True
-
-        subtree2 = RoutingCluster.create_routing_tree(L3, 4)
-        insert = root.add_subtree(subtree2, False)
-
-        assert insert == True
-
-        subtree3 = RoutingCluster.create_routing_tree(L3, 1)
-        l2_cluster = subtree3.find_lx_clusters(L2)[0]
-        l2_cluster.tag = "L2_new"
-
-        insert = root.add_subtree(subtree3, False)
-
-        assert insert == True
-
-    def test_get_parent(self):
-        root = RoutingCluster(L3, tag="L3")
-        cluster_l2_1 = RoutingCluster(L2, tag="L2_1")
-        cluster_l1_1 = RoutingCluster(L1, tag="L1_1")
-        cluster_l1_2 = RoutingCluster(L1, tag="L1_2")
-        cluster_l1_3 = RoutingCluster(L1, tag="L1_3")
-
-        assert cluster_l2_1.add_child_to(cluster_l1_1, X0Y0) == True
-        assert cluster_l2_1.add_child_to(cluster_l1_2, X0Y1) == True
-
-        assert root.add_child_to(cluster_l2_1, X0Y0) == True
-
-        parent1 = get_parent(root, cluster_l1_1)
-
-        assert parent1 == cluster_l2_1
-
-        parent2 = get_parent(root, cluster_l1_3)
-        assert parent2 is None
-
-    def test_routing_coord(self):
-        root = RoutingCluster(L3, tag="L3")
-        cluster_l2_1 = RoutingCluster(L2, tag="L2_1")
-        cluster_l1_1 = RoutingCluster(L1, tag="L1_1")
-        cluster_l1_2 = RoutingCluster(L1, tag="L1_2")
-        cluster_l1_3 = RoutingCluster(L1, tag="L1_3")
-        cluster_l0_1 = RoutingCluster(L0, tag="L0_1")
-
-        assert cluster_l1_3.add_child_to(cluster_l0_1, X0Y1) == True
-        assert cluster_l2_1.add_child_to(cluster_l1_1, X0Y0) == True
-        assert cluster_l2_1.add_child_to(cluster_l1_2, X0Y1) == True
-        assert cluster_l2_1.add_child_to(cluster_l1_3, X1Y0) == True
-        assert root.add_child_to(cluster_l2_1, X1Y1) == True
-
-        assert root.routing_coord == RoutingCoord(X0Y0, X0Y0)
-        assert cluster_l1_2.routing_coord == RoutingCoord(X0Y0, X0Y0, X1Y1, X0Y1, ANY)
-        assert cluster_l0_1.routing_coord == RoutingCoord(X0Y0, X0Y0, X1Y1, X1Y0, X0Y1)
-
-
 class TestRoutingGroup:
     def test_RoutingGroup_instance(self, build_example_net1):
         net = build_example_net1
@@ -296,7 +30,7 @@ def test_RoutingGroup_instance(self, build_example_net1):
         mapper.compile()
 
         # 8+5+4, 8+8+4
-        assert mapper.routing_tree.n_core_total >= mapper.n_core_required
+        assert mapper.routing_manager.n_core_total >= mapper.n_core_required
 
     def test_RoutingGroup_instance2(self, monkeypatch, build_example_net2):
         net = build_example_net2
@@ -308,7 +42,7 @@ def test_RoutingGroup_instance2(self, monkeypatch, build_example_net2):
         mapper.build(net)
         mapper.compile()
 
-        assert mapper.routing_tree.n_core_total >= mapper.n_core_required
+        assert mapper.routing_manager.n_core_total >= mapper.n_core_required
 
     def test_RoutingGroup_instance3(self, build_example_net4):
         net = build_example_net4
@@ -319,7 +53,7 @@ def test_RoutingGroup_instance3(self, build_example_net4):
         mapper.compile()
 
         assert len(mapper.core_blocks) == 3
-        assert mapper.routing_tree.n_core_total >= mapper.n_core_required
+        assert mapper.routing_manager.n_core_total >= mapper.n_core_required
 
     def test_RoutingGroup_instance4(self, monkeypatch, build_example_net4):
         net = build_example_net4
@@ -332,174 +66,57 @@ def test_RoutingGroup_instance4(self, monkeypatch, build_example_net4):
         mapper.compile()
 
         assert len(mapper.core_blocks) == 4
-        assert mapper.routing_tree.n_core_total >= mapper.n_core_required
-
-
-class TestRoutingRoot:
-    def test_get_n_lxcluster(self, build_example_root, monkeypatch):
-        monkeypatch.setattr(
-            pb.BACKEND_CONFIG, "target_chip_addr", [Coord(0, 0), Coord(1, 0)]
-        )
-
-        root = RoutingRoot(pb.BACKEND_CONFIG.target_chip_addr)
-
-        assert root[0].include_online == True
-        assert root[1].include_online == True
-        assert root[0].add_subtree(build_example_root, False) == True
-        assert root[1].add_subtree(build_example_root, False) == True
-
-        clusters_l5 = root[0].breadth_of_lx(L5)
-        clusters_l4 = root[0].breadth_of_lx(L4)
-        clusters_l3 = root[0].breadth_of_lx(L3)
-        clusters_l2 = root[1].breadth_of_lx(L2)
-        clusters_l1 = root[1].breadth_of_lx(L1)
-        clusters_l0 = root[1].breadth_of_lx(L0)
-
-        assert clusters_l5 == 1
-        assert clusters_l4 == 1
-        assert clusters_l3 == 1
-        assert clusters_l2 == 2
-        assert clusters_l1 == 5
-        assert clusters_l0 == 0
-
-        assert root.breadth_of_lx(L1) == 5 * 2
-        assert root.breadth_of_lx(L2) == 2 * 2
-
-    @staticmethod
-    def _gen_routing_cluster(n_core: int):
-        from paicorelib import get_routing_consumption
-
-        cost = get_routing_consumption(n_core)
-        level = cost.get_routing_level()
-
-        routing_root = RoutingCluster.create_routing_tree(level, cost[level.value])
-
-        for i in range(cost.n_L0):
-            if i < n_core:
-                if not routing_root.add_L0_for_placing(data=i):
-                    raise RuntimeError
-            else:
-                if not routing_root.add_L0_for_placing(data="occupied"):
-                    raise RuntimeError
-
-        return routing_root
-
-    @staticmethod
-    def _gen_random_cores(n_core: int):
-        n_core_half = n_core // 2
-        cores = []
-
-        for _ in range(n_core_half):
-            cores.append(np.random.randint(1, 300, dtype=int))
-
-        for _ in range(n_core - n_core_half):
-            cores.append(np.random.randint(100, 600, dtype=int))
-
-        return cores
-
-    def test_get_insert_location(self, monkeypatch):
-        clist = [Coord(0, 0)]
-        monkeypatch.setattr(pb.BACKEND_CONFIG, "target_chip_addr", clist)
+        assert mapper.routing_manager.n_core_total >= mapper.n_core_required
 
-        root = RoutingRoot(pb.BACKEND_CONFIG.target_chip_addr)
-
-        cores_required = [100, 200, 100, 100]
-        cores_cost = [1 << (n - 1).bit_length() for n in cores_required]
-
-        for core_incoming, core_req in zip(cores_cost, cores_required):
-            core_loc, chip_idx_loc, routing_path = root.get_insert_location(
-                core_incoming, core_req
-            )
 
+class TestRoutingManager:
     @pytest.mark.parametrize(
-        "cores, expectation",
-        (
-            ([10, 20, 30, 40, 100, 200], nullcontext()),
-            ([5, 10, 20, 100, 500], pytest.raises(RoutingError)),
-        ),
-    )
-    def test_insert_routing_group_1chip(self, cores, expectation):
-        root = RoutingRoot(pb.BACKEND_CONFIG.target_chip_addr)
-
-        with expectation as e:
-            for core in cores:
-                subtree = self._gen_routing_cluster(core)
-                if not root[0].add_subtree(subtree, True):
-                    raise RoutingError("Insert failed.")
-
-        huge_core = 500
-        subtree = self._gen_routing_cluster(huge_core)
-        assert root[0].add_subtree(subtree, True) == False  # Out of resources
-
-    @pytest.mark.parametrize(
-        "cores, expectation",
-        (
+        "chip_list, n_cores, expected",
+        [
+            # expected = [(chip_idx_loc, core_loc), (...), ...]
             (
-                [64, 128, 64],
+                [Coord(0, 0)],
+                [100, 200, 100, 100],
                 [
-                    RoutingCoord(X0Y0, X0Y0),
-                    RoutingCoord(X0Y0, X1Y0),
-                    RoutingCoord(X0Y0, X1Y1),
-                    RoutingCoord(X0Y0, X0Y1),
+                    (0, 0),
+                    (0, 256),
+                    (0, 256 + 256),
+                    (0, 256 + 256 + 128),
                 ],
             ),
-        ),
-    )
-    def test_insert_routing_group_detail(self, cores, expectation):
-        root = RoutingRoot(pb.BACKEND_CONFIG.target_chip_addr)
-        index = 0
-
-        for core in cores:
-            subtree = self._gen_routing_cluster(core)
-            assert root[0].add_subtree(subtree, True) == True
-
-            if len(subtree.children) == 1:
-                assert subtree[X0Y0].routing_coord == expectation[index]
-                index += 1
-            elif len(subtree.children) == 2:
-                assert subtree[X0Y0].routing_coord == expectation[index]
-                index += 1
-                assert subtree[X0Y1].routing_coord == expectation[index]
-                index += 1
-            elif len(subtree.children) == 4:
-                assert subtree[X0Y0].parent.routing_coord == expectation[index]
-                index += 1
-            else:
-                assert False
-
-    @pytest.mark.parametrize(
-        "cores, expectation",
-        (
-            ([200, 400, 600, 800, 1000], nullcontext()),
-            ([80, 100, 240, 490, 500, 490, 1000, 1000], nullcontext()),
-            ([512, 128, 128, 128, 32, 32, 32, 16, 1000, 1000, 1000], nullcontext()),
-            ([200, 400, 600, 800, 1020], pytest.raises(RoutingError)),
-            ([80, 100, 240, 490, 490, 500, 1000, 1000], pytest.raises(RoutingError)),
-            ([200, 400, 600, 800, 200, 400, 200, 300], pytest.raises(RoutingError)),
-        ),
+            (
+                [Coord(0, 0)],
+                [20, 10, 10, 20, 50],
+                [(0, 0), (0, 32), (0, 48), (0, 64), (0, 128)],
+            ),
+            (
+                [Coord(0, 0), Coord(1, 0)],
+                [200, 120, 100, 200, 400, 200, 10],
+                [
+                    (0, 0),
+                    (0, 256),
+                    (0, 256 + 128),
+                    (0, 512),
+                    (1, 0),
+                    (1, 512),
+                    (1, 512 + 256),
+                ],
+            ),
+        ],
     )
-    def test_insert_routing_group_multichip4(self, cores, expectation):
-        chip_list = [Coord(1, 1), Coord(1, 2), Coord(2, 1), Coord(2, 2)]
-        root = RoutingRoot(chip_list)
-
-        subtrees = []
-        for core in cores:
-            subtrees.append(self._gen_routing_cluster(core))
+    def test_get_insert_location(self, chip_list, n_cores, expected, monkeypatch):
+        monkeypatch.setattr(pb.BACKEND_CONFIG, "target_chip_addr", chip_list)
+        root = RoutingManager(pb.BACKEND_CONFIG.target_chip_addr)
+        cores_cost = [1 << (n - 1).bit_length() for n in n_cores]
 
-        n_wasted = [int(np.power(2, np.ceil(np.log2(core))) - core) for core in cores]
-
-        with expectation as e:
-            for i, subtree in enumerate(subtrees):
-                check_hit_online = n_wasted[i] <= HwConfig.N_CORE_ONLINE
-                flag = False
-
-                for chip_root in root.chip_roots:
-                    flag = chip_root.add_subtree(subtree, check_hit_online)
-                    if flag:
-                        break
-
-                if not flag:
-                    raise RoutingError("Insert failed.")
+        for i, (core_incoming, core_req) in enumerate(zip(cores_cost, n_cores)):
+            core_loc, chip_idx_loc, routing_path = root.get_insert_location(
+                core_incoming, core_req
+            )
+            assert expected[i][0] == chip_idx_loc
+            assert (
+                chip_idx_loc * HwConfig.N_CORE_MAX_INCHIP + expected[i][1] == core_loc
+            )
 
 
 @pytest.mark.parametrize("lx", [L4, L3, L2, L1, L0])

From 0b3e725d4677d1f8dcdd1a8e839388f22decf9be Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 28 Oct 2024 10:15:48 +0800
Subject: [PATCH 109/187] =?UTF-8?q?=F0=9F=8E=A8=20sync=20changes,=20typing?=
 =?UTF-8?q?=20&=20format=20improved?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/mapper.py        | 43 +++++++++++++++------------------
 paibox/backend/placement.py     | 41 +++++++++++++++++--------------
 paibox/backend/segment_utils.py |  6 ++---
 paibox/utils.py                 | 15 +++++-------
 tests/backend/test_context.py   |  6 +++++
 tests/backend/test_graphs.py    |  2 --
 tests/backend/test_mapper.py    |  6 ++---
 7 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 06bcc3c2..859d5085 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -22,15 +22,9 @@
     OutputDestConf,
 )
 from .context import _BACKEND_CONTEXT, set_cflag
-from .graphs import (
-    PAIGraph,
-    convert2routing_groups,
-    get_node_degrees,
-    get_succ_cb_by_node,
-    toposort,
-)
+from .graphs import PAIGraph, get_node_degrees, get_succ_cb_by_node, toposort
 from .placement import CoreBlock, aligned_coords, max_lcn_of_cb
-from .routing import RoutingGroup, RoutingRoot
+from .routing import RoutingGroup, RoutingManager
 from .types import NeuSegment, NodeDegree, NodeType, SourceNodeType, is_iw8
 
 __all__ = ["Mapper"]
@@ -59,12 +53,13 @@ def __init__(self) -> None:
 
         self.n_core_required = 0
         self.n_core_occupied = 0
-        self.routing_tree = RoutingRoot(chip_list=_BACKEND_CONTEXT["target_chip_addr"])
+        self.routing_manager = RoutingManager(
+            chip_list=_BACKEND_CONTEXT["target_chip_addr"]
+        )
 
         self.clear()
 
     def clear(self) -> None:
-        self.routing_tree.clear()
         self.graph.clear()
 
         self.core_blocks.clear()
@@ -108,7 +103,7 @@ def compile(
         core_estimate_only: bool = False,
         weight_bit_optimization: bool = True,
         grouping_optim_target: Literal["latency", "core", "both"] = "both",
-        no_twisted_branch: bool = True,
+        no_twisted_branch: bool = False,
         multicast_optim: Union[bool, Sequence[NodeType]] = False,
         **kwargs,
     ) -> GraphInfo:
@@ -123,7 +118,7 @@ def compile(
                 `core` or `both`, which respectively represent the optimization goal of delay/throughput,       \
                 occupied cores, or both. The default is specified by the corresponding compilation option in the\
                 backend configuration item. Default is 'both'.
-            - no_twisted_branch: when parsing the network topology, whether or not to prohibit intersecting     \
+            - no_twisted_branch (for advanced use): when parsing the network topology, whether or not to prohibit intersecting     \
                 branch structures will cause such structures to be processed. For example:
 
                 I -> A -> B -> C
@@ -135,7 +130,7 @@ def compile(
                 I -> A -> B -> C
                   -> A'------>
 
-                Default is true.
+                Default is false.
 
             - multicast_optim (in dev): whether to perform multicast optimization. If true, the optimization is \
                 performed on all nodes in the network. If a node list is passed, the optimization is attempted  \
@@ -207,17 +202,16 @@ def untwist_branch_nodes(self) -> None:
 
     def build_core_blocks(self) -> None:
         """Build core blocks based on partitioned edges."""
-        route_groups = self.graph.graph_partition()
+        merged_sgrps = self.graph.graph_partition()
 
-        for route_group in route_groups:
-            route_group.dump()
-            self.routing_groups.append(RoutingGroup(route_group))
+        for msgrp in merged_sgrps:
+            self.routing_groups.append(RoutingGroup.build(msgrp))
 
         for rg in self.routing_groups:
-            self.core_blocks.extend(rg.core_blocks)
+            self.core_blocks += rg.core_blocks
 
         for cur_cb in self.core_blocks:
-            succ_cbs = []
+            succ_cbs: list[CoreBlock] = []
             # cur_cb == cb is possible
             for cb in self.core_blocks:
                 if any(d for d in cur_cb.dest if d in cb.source):
@@ -305,8 +299,8 @@ def coord_assign(self, core_estimate_only: bool) -> None:
             )
 
         for rg in self.routing_groups:
-            rg.sub_routing_group.set_config()
-            rg.sub_routing_group.dump()
+            rg.set_core_required()
+
         # Optimize the order of routing groups
         # self.routing_groups = reorder_routing_groups(self.succ_routing_groups)
         self.routing_groups = toposort(self.succ_routing_groups)
@@ -318,13 +312,14 @@ def coord_assign(self, core_estimate_only: bool) -> None:
 
         if core_estimate_only:
             return None
-        elif n_core_required > n_avail_cores:
+
+        if n_core_required > n_avail_cores:
             raise ResourceError(
                 OUT_OF_CORE_RESOURCE_TEXT.format(n_avail_cores, n_core_required)
             )
 
         for rg in self.routing_groups:
-            self.routing_tree.place_routing_group(rg)
+            self.routing_manager.place_routing_group(rg)
 
         # Calculate the consumption of occupied physical cores.
         if (
@@ -373,7 +368,7 @@ def config_export(self) -> GraphInfo:
                 "name": self.graph.graph_name_repr,
                 "clk_en_L2": get_clk_en_L2_dict(
                     _BACKEND_CONTEXT["target_chip_addr"],
-                    self.routing_tree.used_L2_clusters,
+                    self.routing_manager.used_L2_clusters,
                 ),
             },
         )
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index ec24cb57..07dda054 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -1,6 +1,5 @@
 import math
 import warnings
-from functools import cached_property
 from typing import ClassVar, Literal, Optional, overload
 
 import numpy as np
@@ -11,9 +10,9 @@
 from paibox.components import FullConnectedSyn, Neuron
 from paibox.exceptions import (
     GraphBuildError,
-    NotSupportedError,
     ResourceError,
     TruncationWarning,
+    NotSupportedError,
 )
 from paibox.types import WEIGHT_DTYPE, WeightType
 from paibox.utils import check_attr_same
@@ -36,18 +35,18 @@
     NeuSegment,
     NeuSegOfCoreBlock,
     NeuSegOfCorePlm,
-    RouteGroup,
     SourceNodeType,
     WRAMPackedType,
     WRAMUnpackedType,
     is_iw8,
+    MergedSuccGroup,
 )
 
 
 class CoreBlock(CoreAbstract):
 
     _parents: tuple[FullConnectedSyn, ...]
-    _routing_id: int
+    _routing_id: int  # TODO will be deprecated
     seed: int
     """Random seed, legal integer, no more than uint64."""
     _lcn_ex: LCN_EX
@@ -100,6 +99,7 @@ def __init__(
         self.axon_segments = dict()
         self.neuron_segs_of_cb = []
         self.ordered_axons: list[SourceNodeType] = []
+        """Axons in private + multicast order."""
 
     def group_neurons(
         self, optim_target: Literal["latency", "core", "both"] = "both"
@@ -152,7 +152,9 @@ def _n_axon2lcn_ex(self) -> LCN_EX:
 
         return LCN_EX(lcn)
 
-    def assign(self, allocated: list[Coord], chip_coord: Coord) -> list[Coord]:
+    def assign_coord(
+        self, chip_coord: Coord, allocated: list[Coord]
+    ) -> tuple[list[Coord], list[Coord]]:
         self.core_coords = allocated
         self.chip_coord = chip_coord
         return allocated, []
@@ -283,27 +285,27 @@ def n_neuron_of_plm(self) -> list[int]:
 
         # Get #N of neurons on each `CorePlacement` according to the
         # maximum address required of neuron segments on each `CorePlacement`.
-        assert [] not in self.neuron_segs_of_cb  # TODO if it never happens, remove it.
-
         return [
             sum(seg.n_neuron for seg in neuron_segs)
             for neuron_segs in self.neuron_segs_of_cb
         ]
 
-    def group_axons(self, multicast_axons: list[SourceNodeType] = list()) -> None:
+    def group_axons(self, multicast_axons: list[SourceNodeType] = []) -> None:
+        """Group the axons, including the private & the multicast parts.
+
+        NOTE: Take the union of the private axons & the multicast axons, but sort the multicast axons first, then the \
+            axons that are in the private part and not in the multicast part.
+        """
         if not self._lcn_locked:
-            raise GraphBuildError("get axon segments after 'lcn_ex' is locked.")
-        # Remove shared axons
-        axons = [ax for ax in self.axons if ax not in multicast_axons]
-        # More axons may be added to the axon list
-        axons = multicast_axons + axons
+            raise GraphBuildError("group axons after 'lcn_ex' is locked.")
+
+        axons = multicast_axons + [ax for ax in self.axons if ax not in multicast_axons]
         self.ordered_axons = axons
-        print(f"origin: {len(self.axons)}, ordered: {len(self.ordered_axons)}")
         self.axon_segments = get_axon_segments(
             self.ordered_axons, self.n_timeslot, self.n_fanin_base
         )
 
-    @cached_property
+    @property
     def raw_weight_of_dest(self) -> list[WeightType]:
         """Merge and then split the weight matrix according to the grouping of neurons."""
         # The concatenated weight for each destination node.
@@ -391,23 +393,26 @@ def build(
         return cls(*synapses, routing_id=routing_id, mode=rt_mode, seed=seed)
 
     @classmethod
-    def build_core_blocks(cls, route_group: RouteGroup) -> list["CoreBlock"]:
+    def build_core_blocks(cls, route_group: MergedSuccGroup) -> list["CoreBlock"]:
         core_blocks: list[CoreBlock] = []
         succ_nodes = list(route_group.nodes)
         mode = succ_nodes[0].mode
         if any(node.mode != mode for node in succ_nodes):
             raise NotSupportedError("mixed mode is not supported.")
+
+        # TODO More constraints for nodes can be called here.
         idx_of_sg = GraphNodeConstrs.tick_wait_attr_constr(succ_nodes)
-        route_group.set_inputs()
         if len(idx_of_sg) == 0:
             idx_of_sg = [list(range(len(succ_nodes)))]
 
         for idx in idx_of_sg:
             succ_edges: set[EdgeType] = set()
             for i in idx:
-                succ_edges.update(route_group.inputs[succ_nodes[i]])
+                succ_edges.update(route_group.outputs[succ_nodes[i]])
+
             core_block = CoreBlock.build(*succ_edges, routing_id=0, rt_mode=mode)
             core_blocks.append(core_block)
+
         return core_blocks
 
     @classmethod
diff --git a/paibox/backend/segment_utils.py b/paibox/backend/segment_utils.py
index bcc492bf..375e4bc0 100644
--- a/paibox/backend/segment_utils.py
+++ b/paibox/backend/segment_utils.py
@@ -285,9 +285,9 @@ def _seg_alloc(axon: SourceNodeType, offset: int) -> tuple[AxonSegment, int]:
     offset = 0
     axon_segments = dict()
 
-    for axon in axons:
-        segment, offset = _seg_alloc(axon, offset)
-        axon_segments[axon] = segment
+    for ax in axons:
+        segment, offset = _seg_alloc(ax, offset)
+        axon_segments[ax] = segment
 
     return axon_segments
 
diff --git a/paibox/utils.py b/paibox/utils.py
index 08c9ec59..6c43864b 100644
--- a/paibox/utils.py
+++ b/paibox/utils.py
@@ -39,16 +39,13 @@ def count_unique_elem(obj: Iterable[Any]) -> int:
 _T = TypeVar("_T")
 
 
-def merge_unique_ordered(lst1: list[_T], lst2: list[_T]) -> list[_T]:
-    seen = set()
-    result = []
-
-    for item in lst1 + lst2:
-        if item not in seen:
-            seen.add(item)
-            result.append(item)
+def merge_unique_ordered(*lst: list[_T]) -> list[_T]:
+    """Merge lists, keeping the original order of elements and removing duplicates."""
+    total = []
+    for l in lst:
+        total.extend(l)
 
-    return result
+    return list(dict.fromkeys(total))
 
 
 def check_attr_same(obj: Sequence[Any], attr: str) -> bool:
diff --git a/tests/backend/test_context.py b/tests/backend/test_context.py
index 697267f4..3a65403a 100644
--- a/tests/backend/test_context.py
+++ b/tests/backend/test_context.py
@@ -48,4 +48,10 @@ def test_backend_context_add_chip_addr(monkeypatch):
     )
 
     pb.BACKEND_CONFIG.add_chip_addr((3, 4), (10, 10))
+    # keep the orders
+    assert pb.BACKEND_CONFIG.target_chip_addr == [
+        Coord(1, 0),
+        Coord(10, 10),
+        Coord(3, 4),
+    ]
     assert pb.BACKEND_CONFIG["target_chip_addr"][2] == Coord(3, 4)
diff --git a/tests/backend/test_graphs.py b/tests/backend/test_graphs.py
index 3cd372f6..f6cdb2ef 100644
--- a/tests/backend/test_graphs.py
+++ b/tests/backend/test_graphs.py
@@ -258,9 +258,7 @@ def test_untwist_branch_nodes1(
 
         mapper = pb.Mapper()
         mapper.build(net)
-
         mapper.compile(no_twisted_branch=no_twisted_branch)
-
         mapper.export(fp=ensure_dump_dir)
 
         if no_twisted_branch:
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index ea0defce..0c8958f9 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -256,11 +256,11 @@ def __init__(self):
 
         assert graph_info["n_core_occupied"] == n_networks
 
-        rtotal = sum(mapper.routing_tree.n_core_per_chip)
-        r1 = mapper.routing_tree.n_core_per_chip[0]
+        rtotal = sum(mapper.routing_manager.n_core_per_chip)
+        r1 = mapper.routing_manager.n_core_per_chip[0]
 
         if n_networks > 1008:
-            r2 = mapper.routing_tree.n_core_per_chip[1]
+            r2 = mapper.routing_manager.n_core_per_chip[1]
             assert rtotal == r1 + r2
             assert r1 == 1008
             assert r2 == n_networks - 1008

From 2e0574242d7dde93f0132e2bf3a1591091a0faa1 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 28 Oct 2024 10:16:18 +0800
Subject: [PATCH 110/187] =?UTF-8?q?=E2=9C=85=20add=20tests=20for=20`TestMe?=
 =?UTF-8?q?rgedSuccGroup`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_types.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 tests/backend/test_types.py

diff --git a/tests/backend/test_types.py b/tests/backend/test_types.py
new file mode 100644
index 00000000..9996dbb4
--- /dev/null
+++ b/tests/backend/test_types.py
@@ -0,0 +1,30 @@
+import paibox as pb
+from paibox.backend.types import MergedSuccGroup, SuccGroup
+
+
+class TestMergedSuccGroup:
+
+    def test_MergedSuccGroup_inputs(self):
+        """
+        n1 -> s1 -> n2
+           -> s2 -> n3
+        n4 -> s3 ->
+           -> s4 -> n5
+        """
+        n1 = pb.ANNNeuron(1)
+        n2 = pb.ANNNeuron(1)
+        n3 = pb.ANNNeuron(1)
+        n4 = pb.ANNNeuron(1)
+        n5 = pb.ANNNeuron(1)
+        s1 = pb.FullConn(n1, n2)
+        s2 = pb.FullConn(n1, n3)
+        s3 = pb.FullConn(n4, n3)
+        s4 = pb.FullConn(n4, n5)
+
+        sgrp1 = SuccGroup(n1, [n2, n3], [s1, s2])
+        sgrp2 = SuccGroup(n4, [n3, n5], [s3, s4])
+
+        msgrp = MergedSuccGroup(sgrp1, sgrp2)
+
+        # don't care the order
+        assert set(msgrp.outputs.keys()) == set([n2, n3, n5])

From dfffed79f03846ae66884513708b258691e0df92 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 28 Oct 2024 04:50:32 +0000
Subject: [PATCH 111/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/backend/placement.py | 4 ++--
 paibox/backend/routing.py   | 8 ++++----
 paibox/backend/types.py     | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 07dda054..f092b6d4 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -10,9 +10,9 @@
 from paibox.components import FullConnectedSyn, Neuron
 from paibox.exceptions import (
     GraphBuildError,
+    NotSupportedError,
     ResourceError,
     TruncationWarning,
-    NotSupportedError,
 )
 from paibox.types import WEIGHT_DTYPE, WeightType
 from paibox.utils import check_attr_same
@@ -32,6 +32,7 @@
     CoreAbstract,
     DestNodeType,
     EdgeType,
+    MergedSuccGroup,
     NeuSegment,
     NeuSegOfCoreBlock,
     NeuSegOfCorePlm,
@@ -39,7 +40,6 @@
     WRAMPackedType,
     WRAMUnpackedType,
     is_iw8,
-    MergedSuccGroup,
 )
 
 
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index f2084741..e98189d4 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -11,10 +11,10 @@
 from paicorelib.routing_defs import MAX_ROUTING_PATH_LENGTH
 
 from paibox.exceptions import (
-    ResourceError,
-    RoutingError,
     GraphBuildError,
     PAIBoxDeprecationWarning,
+    ResourceError,
+    RoutingError,
 )
 
 from .conf_types import CorePlmConfInChip
@@ -57,7 +57,7 @@ def __init__(
         self.routing_elems: list[Union[CoreBlock, "RoutingGroup"]] = (
             unordered_cb + ordered_rgrp
         )
-        self.offset: list[int] = [] # TODO Change a name
+        self.offset: list[int] = []  # TODO Change a name
         self.n_core_required: int = 0
         """The actual number of cores required by the routing group."""
         self.n_tail_waste: int = 0
@@ -322,7 +322,7 @@ def place_routing_group(
     ) -> tuple[list[Coord], list[Coord]]:
         """Place a routing group in the chip list. Assign each core blocks with routing coordinates &   \
             make sure they are routable.
-        
+
         Returns: a tuple of lists of assigned and wasted coordinates.
         """
         # for cb in rgrp:
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 74626759..2463cf72 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -1,6 +1,6 @@
-from collections import defaultdict
 import sys
 from abc import ABC, abstractmethod
+from collections import defaultdict
 from dataclasses import dataclass
 from enum import Enum, auto, unique
 from typing import Any, NamedTuple, Union

From 3e31c64ec540d405de64bbf398ad2892bc53c3f5 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 29 Oct 2024 19:22:57 +0800
Subject: [PATCH 112/187] =?UTF-8?q?=E2=9C=A8=20feat(backend):=20exporting?=
 =?UTF-8?q?=20graph=20info=20to=20json=20file,=20including=20'clk=5Fen=5FL?=
 =?UTF-8?q?2'=20&=20'target=5Fchip=5Faddr'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_exporting.py     | 57 +++++++++++++++++++++++++++-
 paibox/backend/conf_types.py         | 19 ++++++----
 paibox/backend/context.py            |  2 +
 paibox/backend/mapper.py             | 19 ++++------
 tests/backend/test_conf_exporting.py |  2 +-
 5 files changed, 76 insertions(+), 23 deletions(-)

diff --git a/paibox/backend/conf_exporting.py b/paibox/backend/conf_exporting.py
index eee50e45..347c171b 100644
--- a/paibox/backend/conf_exporting.py
+++ b/paibox/backend/conf_exporting.py
@@ -17,6 +17,7 @@
     CoreConf,
     CorePlmConf,
     FrameArrayType,
+    GraphInfo,
     InputNodeConf,
     NeuronConfig,
     OutputDestConf,
@@ -42,6 +43,7 @@
     "export_output_conf_json",
     "export_neuconf_json",
     "export_core_plm_conf_json",
+    "export_graph_info",
     "export_used_L2_clusters",
     "get_clk_en_L2_dict",
 ]
@@ -236,13 +238,20 @@ def export_input_conf_json(input_conf_info: InputNodeConf, fp: Path) -> None:
 
 def export_output_conf_json(output_conf_info: OutputDestConf, fp: Path) -> None:
     _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["output_conf_json"])
+    _valid_conf = {}
+
+    for dest, dest_info in output_conf_info.items():
+        _valid_conf[dest] = {}
+        for k, v in dest_info.items():
+            _valid_conf[dest][str(k)] = v
+
     if _USE_ORJSON:
         with open(_full_fp, "wb") as f:
             f.write(
                 orjson.dumps(
-                    output_conf_info,
+                    _valid_conf,
                     default=PAIConfigJsonDefault,
-                    option=orjson.OPT_NON_STR_KEYS | orjson.OPT_INDENT_2,
+                    option=orjson.OPT_INDENT_2,
                 )
             )
     else:
@@ -294,6 +303,50 @@ def export_core_plm_conf_json(
             json.dump(_valid_conf, f, indent=2)
 
 
+def export_aux_gh_info(gh_info: GraphInfo, fp: Path, export_clk_en_L2: bool) -> None:
+    _full_fp = _with_suffix_json(fp, _BACKEND_CONTEXT["graph_info_json"])
+    aux_gh_info_dict = {
+        "name": gh_info["name"],
+        "n_core_required": gh_info["n_core_required"],
+        "n_core_occupied": gh_info["n_core_occupied"],
+        "layer_num": gh_info["inherent_timestep"],
+    }
+
+    if misc := gh_info.get("misc"):
+        aux_gh_info_dict["misc"] = {}
+        # Export the serial port data of the L2 cluster clocks
+        if export_clk_en_L2 and (clk_en_L2_dict := misc.get("clk_en_L2")):
+            # dict[ChipCoord, list[int]]
+            aux_gh_info_dict["misc"]["clk_en_L2"] = {
+                str(k): v for k, v in clk_en_L2_dict.items()
+            }
+        if lst := misc.get("target_chip_list"):  # list of ChipCoord
+            aux_gh_info_dict["misc"]["target_chip_list"] = [str(i) for i in lst]
+
+    if _USE_ORJSON:
+        with open(_full_fp, "wb") as f:
+            f.write(orjson.dumps(aux_gh_info_dict, option=orjson.OPT_INDENT_2))
+    else:
+        with open(_full_fp, "w") as f:
+            json.dump(aux_gh_info_dict, f, indent=2)
+
+
+def export_graph_info(
+    gh_info: GraphInfo,
+    fp: Path,
+    export_clk_en_L2: bool,
+    export_core_placements: bool = False,
+) -> None:
+    # Export the configurations of input nodes
+    export_input_conf_json(gh_info["input"], fp)
+    # Export the configurations of output destinations
+    export_output_conf_json(gh_info["output"], fp)
+    export_aux_gh_info(gh_info, fp, export_clk_en_L2)
+
+    if export_core_placements:
+        export_core_plm_conf_json(gh_info["members"], fp)
+
+
 def export_used_L2_clusters(
     clk_en_L2_dict: dict[ChipCoord, list[int]], fp: Path, fname: str = "used_L2"
 ) -> None:
diff --git a/paibox/backend/conf_types.py b/paibox/backend/conf_types.py
index 5259b1e5..a8c5f7f6 100644
--- a/paibox/backend/conf_types.py
+++ b/paibox/backend/conf_types.py
@@ -9,7 +9,6 @@
     LCN_EX,
     ChipCoord,
     Coord,
-    CoordAddr,
     InputWidthFormat,
     MaxPoolingEnable,
     NeuronAttrs,
@@ -27,6 +26,11 @@
 else:
     from typing_extensions import TypeAlias
 
+if sys.version_info >= (3, 11):
+    from typing import NotRequired
+else:
+    from typing_extensions import NotRequired
+
 from paibox.components import Neuron
 
 from .types import AxonCoord, NeuSegment, NodeName, WRAMPackedType
@@ -276,7 +280,7 @@ def to_json(self) -> dict[str, Any]:
 
 
 InputNodeConf: TypeAlias = dict[NodeName, InputNeuronDest]
-OutputDestConf: TypeAlias = dict[NodeName, dict[CoordAddr, NeuronDestInfo]]
+OutputDestConf: TypeAlias = dict[NodeName, dict[Coord, NeuronDestInfo]]
 CorePlmConfInChip: TypeAlias = dict[Coord, CorePlmConfig]
 CorePlmConf: TypeAlias = dict[ChipCoord, CorePlmConfInChip]
 CoreConfInChip: TypeAlias = dict[Coord, CoreConfig]
@@ -284,11 +288,10 @@ def to_json(self) -> dict[str, Any]:
 
 
 class GraphInfo(TypedDict):
-    """Information of compiled graph.
-
-    TODO Optimize the data structure
-    """
+    """Information of graph after compilation."""
 
+    name: str
+    """Name of the graph."""
     input: InputNodeConf
     output: OutputDestConf
     members: CorePlmConf
@@ -297,5 +300,5 @@ class GraphInfo(TypedDict):
     """The actual used cores."""
     n_core_occupied: int
     """The occupied cores, including used & wasted."""
-    misc: dict[str, Any]
-    """Miscellaneous information."""
+    misc: NotRequired[dict[str, Any]]
+    """Miscellaneous information. Not required."""
diff --git a/paibox/backend/context.py b/paibox/backend/context.py
index f0c7e81d..64cd1a43 100644
--- a/paibox/backend/context.py
+++ b/paibox/backend/context.py
@@ -14,6 +14,7 @@
 DEFAULT_CORE_PARAMS_CONF_JSON = "core_params"
 DEFAULT_INPUT_CONF_JSON = "input_proj_info"
 DEFAULT_OUTPUT_CONF_JSON = "output_dest_info"
+DEFAULT_GRAPH_INFO_JSON = "graph_info"
 
 
 class _BackendContext(_Context):
@@ -25,6 +26,7 @@ class _BackendContext(_Context):
         "core_conf_json": DEFAULT_CORE_PARAMS_CONF_JSON,  # RO mostly
         "input_conf_json": DEFAULT_INPUT_CONF_JSON,  # RO mostly
         "output_conf_json": DEFAULT_OUTPUT_CONF_JSON,  # RO mostly
+        "graph_info_json": DEFAULT_GRAPH_INFO_JSON,  # RO mostly
         "cflags": dict(),  # R/W
     }
 
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 859d5085..045e33d5 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -182,13 +182,13 @@ def compile(
 
         if core_estimate_only:
             return GraphInfo(
+                name=self.graph.graph_name_repr,
                 input={},
                 output={},
                 members={},
                 inherent_timestep=self.graph.inherent_timestep,
                 n_core_required=self.n_core_required,
                 n_core_occupied=0,
-                misc={"name": self.graph.graph_name_repr},
             )
 
         """Allocate the core blocks to the core placments."""
@@ -358,6 +358,7 @@ def config_export(self) -> GraphInfo:
         output_dest_info = self._member_cb_and_onode_config_export()
 
         _graph_info = GraphInfo(
+            name=self.graph.graph_name_repr,
             input=input_nodes_info,
             output=output_dest_info,
             members=self.core_plm_config,  # The configuration of physical cores is in `core_plm_config`
@@ -365,11 +366,11 @@ def config_export(self) -> GraphInfo:
             n_core_required=self.n_core_required,
             n_core_occupied=self.n_core_occupied,
             misc={
-                "name": self.graph.graph_name_repr,
                 "clk_en_L2": get_clk_en_L2_dict(
                     _BACKEND_CONTEXT["target_chip_addr"],
                     self.routing_manager.used_L2_clusters,
                 ),
+                "target_chip_list": _BACKEND_CONTEXT.target_chip_addr,
             },
         )
 
@@ -539,7 +540,7 @@ def _member_onode_cb_config_export(
                         output_core_coord=cur_ocoord,
                         axon_addr_offset=output_axon_offset,
                     )
-                    output_dest_info[neu_seg.target.name][core_plm.coord.address] = (
+                    output_dest_info[neu_seg.target.name][core_plm.coord] = (
                         core_plm.neu_configs[neu_seg.target].neuron_dest_info
                     )
 
@@ -564,7 +565,7 @@ def _onode_cb_config_export(
                     output_core_coord=cur_ocoord,
                     axon_addr_offset=output_axon_offset,
                 )
-                output_dest_info[neu_seg.target.name][core_plm.coord.address] = (
+                output_dest_info[neu_seg.target.name][core_plm.coord] = (
                     core_plm.neu_configs[neu_seg.target].neuron_dest_info
                 )
 
@@ -617,14 +618,8 @@ def export(
             # Export the parameters of occupied cores
             export_core_params_json(self.core_params, _fp)
 
-        # Export the configurations of input nodes
-        export_input_conf_json(self.graph_info["input"], _fp)
-        # Export the configurations of output destinations
-        export_output_conf_json(self.graph_info["output"], _fp)
-
-        # Export the serial port data of the L2 cluster clocks
-        if export_clk_en_L2:
-            export_used_L2_clusters(self.graph_info["misc"]["clk_en_L2"], _fp)
+        # Export the graph information
+        export_graph_info(self.graph_info, _fp, export_clk_en_L2)
 
         return config_dict
 
diff --git a/tests/backend/test_conf_exporting.py b/tests/backend/test_conf_exporting.py
index 143c4c0f..6d6b0a4a 100644
--- a/tests/backend/test_conf_exporting.py
+++ b/tests/backend/test_conf_exporting.py
@@ -174,7 +174,7 @@ def test_export_input_conf_json(self, ensure_dump_dir, n_neuron):
 
     @pytest.mark.parametrize("n_neuron", [100, 200, 300])
     def test_export_output_conf_json(self, ensure_dump_dir, n_neuron):
-        oconf = {"n1": {0: _gen_random_neuron_dest_info(n_neuron)}}
+        oconf = {"n1": {Coord(3, 2): _gen_random_neuron_dest_info(n_neuron)}}
         export_output_conf_json(oconf, ensure_dump_dir)
 
     @pytest.mark.parametrize("n_neuron", [100, 200, 300])

From d1e699c79113b783d93b4611b3b00ebc77584f6a Mon Sep 17 00:00:00 2001
From: KafCoppelia <69038090+KafCoppelia@users.noreply.github.com>
Date: Wed, 30 Oct 2024 17:38:06 +0800
Subject: [PATCH 113/187] Test: add test cases & doc (#133)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Test: add some new cases (#132)

* add some new cases

* Change test case number.

* :rotating_light: auto fix by pre-commit hooks

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* 📝 doc: update on-board test doc

* :rotating_light: auto fix by pre-commit hooks

---------

Co-authored-by: yang1556 <92725391+yang1556@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 tests/onboard/README.md       | 167 ++++++++-
 tests/onboard/test_onboard.py | 645 ++++++++++++++++++++++++++++------
 2 files changed, 684 insertions(+), 128 deletions(-)

diff --git a/tests/onboard/README.md b/tests/onboard/README.md
index c79cb5a8..65e86190 100644
--- a/tests/onboard/README.md
+++ b/tests/onboard/README.md
@@ -2,6 +2,17 @@
 
 ## ANN权重映射
 
+|         测试项目          | 结果 |    备注    |
+| :-----------------------: | :--: | :--------: |
+| [001 单层](#001-单层w8e1) |  ✅  |            |
+| [002 单层](#002-单层w8e4) |  ✅  |            |
+| [003 单层](#003-单层w2e2) |  ✅  |            |
+| [004 单层](#004-单层w4e2) |  ✅  |            |
+| [005 多层-1](#005-多层-1) |  ✅  |            |
+| [006 多层-2](#006-多层-2) |  ✅  |            |
+| [007 单层](#007-单层w4e1) |  ❌  | 芯片不输出 |
+| [008 多层-1](#008-多层-1) |  ❌  | 芯片不输出 |
+
 ### 无神经元在WRAM
 
 #### 001 单层W=8E=1
@@ -85,7 +96,7 @@
 
 ### 有神经元在WRAM
 
-#### 007 单层W=4E=1 \*芯片不输出
+#### 007 单层W=4E=1
 
 测试设置：
 
@@ -98,7 +109,7 @@
 1. 在输出数据中，每个时刻均有有效数据
 2. 有效输出数据应与参考输出在每个时间步相等
 
-#### 008 多层-1 \*芯片不输出
+#### 008 多层-1
 
 测试设置：
 
@@ -113,6 +124,10 @@
 
 ## SNN算子
 
+|         测试项目          | 结果 | 备注 |
+| :-----------------------: | :--: | :--: |
+| [001 Conv1d](#001-conv1d) |  ✅  |      |
+
 ### Conv1d
 
 #### 001 Conv1d
@@ -130,6 +145,23 @@
 
 ## 半折叠算子
 
+|                      测试项目                       | 结果 |    备注    |
+| :-------------------------------------------------: | :--: | :--------: |
+|    [001 Conv2dSemiFolded](#001-conv2dsemifolded)    |  ✅  |            |
+|    [002 Conv2dSemiFolded](#002-conv2dsemifolded)    |  ❌  | 不完全相等 |
+|    [003 Conv2dSemiFolded](#003-conv2dsemifolded)    |      |            |
+|    [004 Conv2dSemiFolded](#004-conv2dsemifolded)    |      |            |
+|    [005 Conv2dSemiFolded](#005-conv2dsemifolded)    |      |            |
+|    [006 Conv2dSemiFolded](#006-conv2dsemifolded)    |  ❌  | 不完全相等 |
+|    [007 Conv2dSemiFolded](#007-conv2dsemifolded)    |      |            |
+|    [008 Conv2dSemiFolded](#008-conv2dsemifolded)    |      |            |
+|    [009 Conv2dSemiFolded](#009-conv2dsemifolded)    |      |            |
+| [010 MaxPool2dSemiFolded](#010-maxpool2dsemifolded) |      |            |
+| [011 AvgPool2dSemiFolded](#011-avgpool2dsemifolded) |  ✅  |            |
+| [012 Conv2dSemiFoldedNet](#012-conv2dsemifoldednet) |      |            |
+| [013 Conv2dSemiFoldedNet](#013-conv2dsemifoldednet) |      |            |
+|    [014 CNNSemiFoldedNet](#014-cnnsemifoldednet)    |      |            |
+
 ### 单层
 
 #### 001 Conv2dSemiFolded
@@ -137,6 +169,7 @@
 测试设置：
 
 - 一层半折叠2d卷积
+- 权重：随机权重
 - 输入尺寸：(1, 64, 64)
 - 卷积核尺寸：(4, 1, 7, 7)
 - stride：1
@@ -146,14 +179,67 @@
 
 检查：
 
-1. 在输出数据中，每个时刻均有有效数据
-2. 有效输出数据应与参考输出在每个时间步相等
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
 
 #### 002 Conv2dSemiFolded
 
 测试设置：
 
 - 一层半折叠2d卷积
+- 权重：随机权重
+- 输入尺寸：(1, 8, 8)
+- 卷积核尺寸：(1, 1, 2, 2)
+- stride：2
+- padding：0
+- 输出尺寸：(1, 4, 4)
+- 运行时间步：16，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 注意和003对比
+
+#### 003 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 权重：全1
+- 输入尺寸：(1, 8, 8)
+- 卷积核尺寸：(1, 1, 2, 2)
+- stride：2
+- padding：0
+- 输出尺寸：(1, 4, 4)
+- 运行时间步：16，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 注意监测数据的计算，在出错时建议查看输入数据对比一下数据
+
+#### 004 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 权重：全1
+- 输入尺寸：(8, 64, 64)
+- 卷积核尺寸：(4, 8, 7, 7)
+- stride：2
+- padding：0
+- 输出尺寸：(4, 29, 29)
+- 运行时间步：65，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 注意监测数据的计算，在出错时建议查看输入数据对比一下数据
+
+#### 005 Conv2dSemiFolded
+
+- 一层半折叠2d卷积
+- 权重：权重全为正
 - 输入尺寸：(8, 64, 64)
 - 卷积核尺寸：(4, 8, 7, 7)
 - stride：2
@@ -166,24 +252,79 @@
 1. 在每个时间步的输出数据中，应有有效数据
 2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
 
-#### 003 Conv2dSemiFolded
+#### 006 Conv2dSemiFolded
 
 测试设置：
 
 - 一层半折叠2d卷积
-- 输入尺寸：(8, 64, 64)
+- 权重：随机权重
+- 输入尺寸：(1, 8, 8)
+- 卷积核尺寸：(1, 1, 3, 3)
+- stride：1
+- padding：1
+- 输出尺寸：(1, 8, 8)
+- 运行时间步：10，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 注意和007对比
+
+#### 007 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 权重：全1
+- 输入尺寸：(1, 8, 8)
+- 卷积核尺寸：(1, 1, 3, 3)
+- stride：1
+- padding：1
+- 输出尺寸：(1, 8, 8)
+- 运行时间步：10，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 注意监测数据的计算，在出错时建议查看输入数据对比一下数据
+
+#### 008 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 权重：全1
+- 输入尺寸：(8, 32, 32)
 - 卷积核尺寸：(4, 8, 3, 3)
 - stride：1
 - padding：1
-- 输出尺寸：(4, 64, 64)
-- 运行时间步：65，监测输出
+- 输出尺寸：(4, 32, 32)
+- 运行时间步：35，监测输出
+
+检查：
+
+1. 在每个时间步的输出数据中，应有有效数据
+2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
+
+#### 009 Conv2dSemiFolded
+
+测试设置：
+
+- 一层半折叠2d卷积
+- 权重：随机
+- 输入尺寸：(8, 32, 32)
+- 卷积核尺寸：(4, 8, 3, 3)
+- stride：1
+- padding：1
+- 输出尺寸：(4, 32, 32)
+- 运行时间步：35，监测输出
 
 检查：
 
 1. 在每个时间步的输出数据中，应有有效数据
 2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
 
-#### 004 MaxPool2dSemiFolded
+#### 010 MaxPool2dSemiFolded
 
 测试设置：
 
@@ -200,7 +341,7 @@
 1. 在每个时间步的输出数据中，应有有效数据
 2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
 
-#### 005 AvgPool2dSemiFolded
+#### 011 AvgPool2dSemiFolded
 
 测试设置：
 
@@ -219,7 +360,7 @@
 
 ### 多层
 
-#### 006 Conv2dSemiFoldedNet
+#### 012 Conv2dSemiFoldedNet
 
 测试设置：
 
@@ -236,7 +377,7 @@
 1. 在每个时间步的输出数据中，应有有效数据
 2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
 
-#### 007 Conv2dSemiFoldedNet
+#### 013 Conv2dSemiFoldedNet
 
 测试设置：
 
@@ -253,7 +394,7 @@
 1. 在每个时间步的输出数据中，应有有效数据
 2. 有效输出数据应与参考输出在每个时间步相等（若生成新数据）
 
-#### 008 CNNSemiFoldedNet
+#### 014 CNNSemiFoldedNet
 
 测试设置：
 
diff --git a/tests/onboard/test_onboard.py b/tests/onboard/test_onboard.py
index f1cf28d7..d18f094f 100644
--- a/tests/onboard/test_onboard.py
+++ b/tests/onboard/test_onboard.py
@@ -885,13 +885,7 @@ class Net001(pb.DynSysGroup):
             def __init__(self, w1):
                 super().__init__()
                 self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
-                self.conv1 = pb.Conv2dSemiFolded(
-                    self.i1,
-                    w1,
-                    1,
-                    0,
-                    tick_wait_start=1,
-                )
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w1, 1, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
         TEST_NAME = self.test_Conv2dSemiFolded_001.__name__
@@ -963,20 +957,14 @@ def __init__(self, w1):
 
         print(f"Test {TEST_NAME} end")
 
+    # 对比test002-005系列
+    # weight正常
     def test_Conv2dSemiFolded_002(self):
         class Net002(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
-                self.i1 = pb.InputProj(
-                    input=_out_bypass1, shape_out=shape1[:2]
-                )  # Changed input shape
-                self.conv1 = pb.Conv2dSemiFolded(
-                    self.i1,
-                    w2,
-                    2,  # Changed stride
-                    0,
-                    tick_wait_start=1,
-                )
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
         TEST_NAME = self.test_Conv2dSemiFolded_002.__name__
@@ -987,11 +975,11 @@ def __init__(self, w2):
 
         print(f"\nTest {TEST_NAME} start")
 
-        shape1 = (8, 64, 64)  # C*H*W
-        ksize = (4, shape1[0], 7, 7)  # O*C*K*k
-        out_shape = (4, 29, 29)
+        shape1 = (1, 8, 8)  # C*H*W
+        ksize = (1, 1, 2, 2)  # O*C*K*k
+        out_shape = (1, 4, 4)
 
-        sim_time = 65
+        sim_time = 16
 
         USE_EXISTING_DATA = False
         NPZ_FILE = TEST_CASE_DIR / "data.npz"
@@ -1001,13 +989,14 @@ def __init__(self, w2):
             inpdata1 = npz["inpdata1"]
             refresult1 = npz["refresult1"]
             print("Using the existing data file")
+            print("input:", inpdata1)
+            print("weight:", weight1)
             USE_EXISTING_DATA = True
         except:
             pass
 
         if not USE_EXISTING_DATA:
             print("Generating new data")
-            # W=8, disable weight bit optimization
             weight1 = FIXED_RNG.integers(-10, 10, size=ksize, dtype=np.int8)
             inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
             inpdata1 = np.concatenate(
@@ -1048,20 +1037,13 @@ def __init__(self, w2):
 
         print(f"Test {TEST_NAME} end")
 
+    # weight全为1
     def test_Conv2dSemiFolded_003(self):
         class Net003(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
-                self.i1 = pb.InputProj(
-                    input=_out_bypass1, shape_out=shape1[:2]
-                )  # Changed input shape
-                self.conv1 = pb.Conv2dSemiFolded(
-                    self.i1,
-                    w2,
-                    1,  # Changed stride
-                    1,
-                    tick_wait_start=1,
-                )
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
         TEST_NAME = self.test_Conv2dSemiFolded_003.__name__
@@ -1072,9 +1054,88 @@ def __init__(self, w2):
 
         print(f"\nTest {TEST_NAME} start")
 
+        shape1 = (1, 8, 8)  # C*H*W
+        ksize = (1, shape1[0], 2, 2)  # O*C*K*k
+        out_shape = (1, 4, 4)
+
+        sim_time = 16
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            print("Input", inpdata1)
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # weight1 = FIXED_RNG.integers(0, 1, size=ksize, dtype=np.int8)
+            weight1 = np.ones(ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net003(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+            print(sim.data[probe][i].shape)
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    # 扇入扩展， weight全正1
+    def test_Conv2dSemiFolded_004(self):
+        class Net004(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_004.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
         shape1 = (8, 64, 64)  # C*H*W
-        ksize = (4, shape1[0], 3, 3)  # O*C*K*k
-        out_shape = (4, 64, 64)
+        ksize = (4, shape1[0], 7, 7)  # O*C*K*k
+        out_shape = (4, 29, 29)
 
         sim_time = 65
 
@@ -1086,6 +1147,168 @@ def __init__(self, w2):
             inpdata1 = npz["inpdata1"]
             refresult1 = npz["refresult1"]
             print("Using the existing data file")
+            print("Input", inpdata1)
+            print("weight", weight1)
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # weight1 = FIXED_RNG.integers(0, 1, size=ksize, dtype=np.int8)
+            weight1 = np.ones(ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net004(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+            print(sim.data[probe][i].shape)
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    # 扇入扩展
+    def test_Conv2dSemiFolded_005(self):
+        class Net005(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_005.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (8, 64, 64)  # C*H*W
+        ksize = (4, shape1[0], 7, 7)  # O*C*K*k
+        out_shape = (4, 29, 29)
+
+        sim_time = 65
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            print("Input", inpdata1)
+            print("weight", weight1)
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            weight1 = FIXED_RNG.integers(0, 5, size=ksize, dtype=np.int8)
+            # weight1 = np.ones(ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net005(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+            print(sim.data[probe][i].shape)
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    # 对比006-009
+    def test_Conv2dSemiFolded_006(self):
+        class Net006(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_006.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (1, 8, 8)  # C*H*W
+        ksize = (1, shape1[0], 3, 3)  # O*C*K*k
+        out_shape = (1, 8, 8)
+
+        sim_time = 10
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            print("Input:", inpdata1)
+            print("weight:", weight1)
             USE_EXISTING_DATA = True
         except:
             pass
@@ -1103,7 +1326,7 @@ def __init__(self, w2):
                 (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
             )
 
-        network = Net003(weight1)
+        network = Net006(weight1)
         conv2d = network.conv1
         generated = pb.DynSysGroup.build_fmodule(network)
         sim = pb.Simulator(network, start_time_zero=False)
@@ -1133,20 +1356,254 @@ def __init__(self, w2):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_MaxPool2dSemiFolded_004(self):
-        class Net004(pb.DynSysGroup):
+    def test_Conv2dSemiFolded_007(self):
+        class Net007(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_007.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (1, 8, 8)  # C*H*W
+        ksize = (1, shape1[0], 3, 3)  # O*C*K*k
+        out_shape = (1, 8, 8)
+
+        sim_time = 10
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            print("Input:", inpdata1)
+            print("weight:", weight1)
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # weight =1
+            # weight1 = FIXED_RNG.integers(-10, 10, size=ksize, dtype=np.int8)
+            weight1 = np.ones(ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net007(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_Conv2dSemiFolded_008(self):
+        class Net008(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_008.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (8, 32, 32)  # C*H*W
+        ksize = (4, shape1[0], 3, 3)  # O*C*K*k
+        out_shape = (4, 32, 32)
+
+        sim_time = 35
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            print("Input:", inpdata1)
+            print("weight:", weight1)
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # weight =1
+            # weight1 = FIXED_RNG.integers(-10, 10, size=ksize, dtype=np.int8)
+            weight1 = np.ones(ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net008(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_Conv2dSemiFolded_009(self):
+        class Net009(pb.DynSysGroup):
+            def __init__(self, w2):
+                super().__init__()
+                self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
+
+        USE_EXISTING_DATA = False
+        TEST_NAME = self.test_Conv2dSemiFolded_009.__name__
+        TEST_CASE_DIR = DATA_DIR / TEST_NAME
+        CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
+        if not TEST_CASE_DIR.exists():
+            TEST_CASE_DIR.mkdir()
+
+        print(f"\nTest {TEST_NAME} start")
+
+        shape1 = (8, 32, 32)  # C*H*W
+        ksize = (4, shape1[0], 3, 3)  # O*C*K*k
+        out_shape = (4, 32, 32)
+
+        sim_time = 35
+
+        USE_EXISTING_DATA = False
+        NPZ_FILE = TEST_CASE_DIR / "data.npz"
+        try:
+            npz = np.load(NPZ_FILE)
+            weight1 = npz["weight1"]
+            inpdata1 = npz["inpdata1"]
+            refresult1 = npz["refresult1"]
+            print("Using the existing data file")
+            USE_EXISTING_DATA = True
+        except:
+            pass
+
+        if not USE_EXISTING_DATA:
+            print("Generating new data")
+            # W=8, disable weight bit optimization
+            weight1 = FIXED_RNG.integers(-10, 10, size=ksize, dtype=np.int8)
+            inpa = FIXED_RNG.integers(0, 4, size=shape1, dtype=NEUOUT_U8_DTYPE)
+            inpdata1 = np.concatenate(
+                [inpa, np.zeros_like(inpa)], axis=2, dtype=inpa.dtype
+            )
+            # Shape of reference result is sim_time * refdata
+            refresult1 = np.zeros(
+                (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
+            )
+
+        network = Net009(weight1)
+        conv2d = network.conv1
+        generated = pb.DynSysGroup.build_fmodule(network)
+        sim = pb.Simulator(network, start_time_zero=False)
+        probe = pb.Probe(generated[conv2d][0], "output")
+        sim.add_probe(probe)
+        for i in range(sim_time):
+            pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
+            sim.run(1)
+
+            if not USE_EXISTING_DATA:
+                refresult1[i, :] = sim.data[probe][i]
+
+            print(f"t={i + 1}\n", sim.data[probe][i])
+
+        # Save weights & input data
+        if not USE_EXISTING_DATA:
+            np.savez(
+                NPZ_FILE, weight1=weight1, inpdata1=inpdata1, refresult1=refresult1
+            )
+
+        mapper = pb.Mapper()
+        mapper.build(network)
+        mapper.compile(weight_bit_optimization=False)
+        mapper.export(
+            fp=CONFIG_CASE_DIR, export_core_params=True, format="txt", use_hw_sim=True
+        )
+
+        print(f"Test {TEST_NAME} end")
+
+    def test_MaxPool2dSemiFolded_010(self):
+        class Net010(pb.DynSysGroup):
             def __init__(self, ksize):
                 super().__init__()
                 self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
-                self.maxpool1 = pb.MaxPool2dSemiFolded(
-                    self.i1,
-                    ksize,
-                    2,
-                    tick_wait_start=1,
+                self.pool1 = pb.MaxPool2dSemiFolded(
+                    self.i1, ksize, 2, tick_wait_start=1
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_MaxPool2dSemiFolded_004.__name__
+        TEST_NAME = self.test_MaxPool2dSemiFolded_010.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1184,11 +1641,11 @@ def __init__(self, ksize):
                 (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
             )
 
-        network = Net004(ksize)
-        maxpool = network.maxpool1
+        network = Net010(ksize)
+        pool = network.pool1
         generated = pb.DynSysGroup.build_fmodule(network)
         sim = pb.Simulator(network, start_time_zero=False)
-        probe = pb.Probe(generated[maxpool][0], "output")
+        probe = pb.Probe(generated[pool][0], "output")
         sim.add_probe(probe)
         for i in range(sim_time):
             pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
@@ -1212,21 +1669,17 @@ def __init__(self, ksize):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_AvgPool2dSemiFolded_005(self):
-        class Net005(pb.DynSysGroup):
+    def test_AvgPool2dSemiFolded_011(self):
+        class Net011(pb.DynSysGroup):
             def __init__(self, ksize):
                 super().__init__()
                 self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
-                self.maxpool1 = pb.AvgPool2dSemiFolded(
-                    self.i1,
-                    ksize,
-                    2,
-                    0,
-                    tick_wait_start=1,
+                self.pool1 = pb.AvgPool2dSemiFolded(
+                    self.i1, ksize, 2, 0, tick_wait_start=1
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_AvgPool2dSemiFolded_005.__name__
+        TEST_NAME = self.test_AvgPool2dSemiFolded_011.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1264,11 +1717,11 @@ def __init__(self, ksize):
                 (sim_time, out_shape[0] * out_shape[1]), dtype=NEUOUT_U8_DTYPE
             )
 
-        network = Net005(ksize)
-        maxpool = network.maxpool1
+        network = Net011(ksize)
+        pool = network.pool1
         generated = pb.DynSysGroup.build_fmodule(network)
         sim = pb.Simulator(network, start_time_zero=False)
-        probe = pb.Probe(generated[maxpool][0], "output")
+        probe = pb.Probe(generated[pool][0], "output")
         sim.add_probe(probe)
         for i in range(sim_time):
             pb.FRONTEND_ENV.save(data1=inpdata1[:, :, i])
@@ -1295,38 +1748,23 @@ def __init__(self, ksize):
     @pytest.mark.xfail(
         reason="A ValidationError will be raised due to the backend not support."
     )
-    def test_Conv2dSemiFoldedNet_006(self):
-        class Net006(pb.DynSysGroup):
+    def test_Conv2dSemiFoldedNet_012(self):
+        class Net012(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):
                 super().__init__()
                 self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
-                self.conv1 = pb.Conv2dSemiFolded(
-                    self.i1,
-                    w1,
-                    1,
-                    1,
-                    tick_wait_start=1,
-                )
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w1, 1, 1, tick_wait_start=1)
 
                 self.conv2 = pb.Conv2dSemiFolded(
-                    self.conv1,
-                    w2,
-                    1,
-                    1,
-                    tick_wait_start=3,
+                    self.conv1, w2, 1, 1, tick_wait_start=3
                 )
 
                 self.linear1 = pb.LinearSemiFolded(
-                    self.conv2,
-                    out_shape[1],
-                    weights=w3,
-                    bias=2,
-                    conn_type=pb.SynConnType.All2All,
-                    tick_wait_start=5,
+                    self.conv2, out_shape[1], weights=w3, bias=2, tick_wait_start=5
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFoldedNet_006.__name__
+        TEST_NAME = self.test_Conv2dSemiFoldedNet_012.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1369,7 +1807,7 @@ def __init__(self, w1, w2, w3):
             # Shape of reference result is sim_time * refdata
             refresult1 = np.zeros((sim_time, out_shape[1]), dtype=NEUOUT_U8_DTYPE)
 
-        network = Net006(weight1, weight2, weight3)
+        network = Net012(weight1, weight2, weight3)
         conv2d1 = network.conv1
         conv2d2 = network.conv2
         linear = network.linear1
@@ -1411,38 +1849,21 @@ def __init__(self, w1, w2, w3):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_Conv2dSemiFoldedNet_007(self):
-        class Net007(pb.DynSysGroup):
+    def test_Conv2dSemiFoldedNet_013(self):
+        class Net013(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):
                 super().__init__()
                 self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
-                self.conv1 = pb.Conv2dSemiFolded(
-                    self.i1,
-                    w1,
-                    2,
-                    1,
-                    tick_wait_start=1,
-                )
-
+                self.conv1 = pb.Conv2dSemiFolded(self.i1, w1, 2, 1, tick_wait_start=1)
                 self.conv2 = pb.Conv2dSemiFolded(
-                    self.conv1,
-                    w2,
-                    2,
-                    1,
-                    tick_wait_start=3,
+                    self.conv1, w2, 2, 1, tick_wait_start=3
                 )
-
                 self.linear1 = pb.LinearSemiFolded(
-                    self.conv2,
-                    out_shape[1],
-                    weights=w3,
-                    bias=2,
-                    conn_type=pb.SynConnType.All2All,
-                    tick_wait_start=5,
+                    self.conv2, out_shape[1], weights=w3, bias=2, tick_wait_start=5
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFoldedNet_007.__name__
+        TEST_NAME = self.test_Conv2dSemiFoldedNet_013.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1484,7 +1905,7 @@ def __init__(self, w1, w2, w3):
             # Shape of reference result is sim_time * refdata
             refresult1 = np.zeros((sim_time, out_shape[1]), dtype=NEUOUT_U8_DTYPE)
 
-        network = Net007(weight1, weight2, weight3)
+        network = Net013(weight1, weight2, weight3)
         conv2d1 = network.conv1
         conv2d2 = network.conv2
         linear = network.linear1
@@ -1526,11 +1947,10 @@ def __init__(self, w1, w2, w3):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_CNNSemiFoldedNet_008(self):
-        class Net008(pb.DynSysGroup):
+    def test_CNNSemiFoldedNet_014(self):
+        class Net014(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):
                 super().__init__()
-
                 self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape1[:2])
 
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w1, 1, 1, tick_wait_start=1)
@@ -1544,16 +1964,11 @@ def __init__(self, w1, w2, w3):
                     self.conv2, (2, 2), 2, tick_wait_start=7
                 )
                 self.linear1 = pb.LinearSemiFolded(
-                    self.pool2,
-                    out_shape[1],
-                    weights=w3,
-                    bias=2,
-                    conn_type=pb.SynConnType.All2All,
-                    tick_wait_start=9,
+                    self.pool2, out_shape[1], weights=w3, bias=2, tick_wait_start=9
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_CNNSemiFoldedNet_008.__name__
+        TEST_NAME = self.test_CNNSemiFoldedNet_014.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1596,7 +2011,7 @@ def __init__(self, w1, w2, w3):
             # Shape of reference result is sim_time * refdata
             refresult1 = np.zeros((sim_time, out_shape[1]), dtype=NEUOUT_U8_DTYPE)
 
-        network = Net008(weight1, weight2, weight3)
+        network = Net014(weight1, weight2, weight3)
         conv2d1 = network.conv1
         conv2d2 = network.conv2
         linear = network.linear1

From d4ef9386aff970e5d434f76b82d20d30fc74b417 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 30 Oct 2024 09:42:22 +0800
Subject: [PATCH 114/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix:=20fix=20exportin?=
 =?UTF-8?q?g=20when=20using=20json?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_exporting.py | 18 +++++----
 paibox/backend/conf_types.py     | 18 ++++++---
 paibox/backend/graphs.py         | 63 --------------------------------
 tests/backend/__init__.py        | 10 +++++
 4 files changed, 34 insertions(+), 75 deletions(-)

diff --git a/paibox/backend/conf_exporting.py b/paibox/backend/conf_exporting.py
index 347c171b..7563eeef 100644
--- a/paibox/backend/conf_exporting.py
+++ b/paibox/backend/conf_exporting.py
@@ -256,7 +256,7 @@ def export_output_conf_json(output_conf_info: OutputDestConf, fp: Path) -> None:
             )
     else:
         with open(_full_fp, "w") as f:
-            json.dump(output_conf_info, f, indent=2, cls=PAIConfigJsonEncoder)
+            json.dump(_valid_conf, f, indent=2, cls=PAIConfigJsonEncoder)
 
 
 if _USE_ORJSON:
@@ -317,18 +317,22 @@ def export_aux_gh_info(gh_info: GraphInfo, fp: Path, export_clk_en_L2: bool) ->
         # Export the serial port data of the L2 cluster clocks
         if export_clk_en_L2 and (clk_en_L2_dict := misc.get("clk_en_L2")):
             # dict[ChipCoord, list[int]]
-            aux_gh_info_dict["misc"]["clk_en_L2"] = {
-                str(k): v for k, v in clk_en_L2_dict.items()
-            }
+            aux_gh_info_dict["misc"]["clk_en_L2"] = clk_en_L2_dict
         if lst := misc.get("target_chip_list"):  # list of ChipCoord
-            aux_gh_info_dict["misc"]["target_chip_list"] = [str(i) for i in lst]
+            aux_gh_info_dict["misc"]["target_chip_list"] = lst
 
     if _USE_ORJSON:
         with open(_full_fp, "wb") as f:
-            f.write(orjson.dumps(aux_gh_info_dict, option=orjson.OPT_INDENT_2))
+            f.write(
+                orjson.dumps(
+                    aux_gh_info_dict,
+                    default=PAIConfigJsonDefault,
+                    option=orjson.OPT_INDENT_2,
+                )
+            )
     else:
         with open(_full_fp, "w") as f:
-            json.dump(aux_gh_info_dict, f, indent=2)
+            json.dump(aux_gh_info_dict, f, indent=2, cls=PAIConfigJsonEncoder)
 
 
 def export_graph_info(
diff --git a/paibox/backend/conf_types.py b/paibox/backend/conf_types.py
index a8c5f7f6..0f442d18 100644
--- a/paibox/backend/conf_types.py
+++ b/paibox/backend/conf_types.py
@@ -40,9 +40,13 @@
 
     _USE_ORJSON = True
 
-    def PAIConfigJsonDefault(o: Any):
-        if isinstance(o, Coord):
-            return o.address
+    def PAIConfigJsonDefault(o: Any) -> Any:
+        if isinstance(o, (list, tuple)):
+            return [PAIConfigJsonDefault(i) for i in o]
+        elif isinstance(o, dict):
+            return {str(k): PAIConfigJsonDefault(v) for k, v in o.items()}
+        elif isinstance(o, Coord):
+            return str(o)
         elif isinstance(o, NeuronAttrs):
             return o.model_dump(by_alias=True)
         elif isinstance(o, NeuronDestInfo):
@@ -57,8 +61,12 @@ def PAIConfigJsonDefault(o: Any):
 
     class PAIConfigJsonEncoder(json.JSONEncoder):
         def default(self, o: Any) -> Any:
-            if isinstance(o, Coord):
-                return o.address
+            if isinstance(o, (list, tuple)):
+                return [self.default(i) for i in o]
+            elif isinstance(o, dict):
+                return {str(k): self.default(v) for k, v in o.items()}
+            elif isinstance(o, Coord):
+                return str(o)
             elif isinstance(o, Enum):
                 return o.value
             elif isinstance(o, np.ndarray):
diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index cca7d0b0..b0c1950d 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -535,69 +535,6 @@ def _degree_check(
                 )
 
 
-def convert2routing_groups(
-    succ_dg_of_cb: dict[CoreBlock, list[CoreBlock]],
-    degrees_of_cb: dict[CoreBlock, NodeDegree],
-    input_core_blocks: dict[SourceNodeType, list[CoreBlock]],
-) -> tuple[list[RoutingGroup], dict[RoutingGroup, list[RoutingGroup]]]:
-    ordered_core_blocks = toposort(succ_dg_of_cb)
-    seen_cb = set()
-    routing_groups: list[RoutingGroup] = []
-    succ_cb_gid_dict = defaultdict(list)
-
-    # After that, all input core blocks have been traversed.
-    for input_cbs in input_core_blocks.values():
-        # FIXME Temporary solution. This case should be solved first:
-        # I1 -> A/B, I2 -> B/C.
-        if not seen_cb.isdisjoint(input_cbs):
-            if len(input_cbs) > 1:
-                raise ValueError
-            else:
-                seen_cb.update(input_cbs)
-                routing_groups.append(RoutingGroup(*input_cbs))
-
-    for cb in ordered_core_blocks:
-        # Check whether the core block has been traversed. This judgment condition is for
-        # core blocks with out-degree = 1 & output core blocks (out-degree = 0).
-        if cb not in seen_cb:
-            seen_cb.add(cb)
-            routing_groups.append(RoutingGroup(cb))
-
-        # If out-degree > 1, group successor core blocks according to their routing id.
-        if degrees_of_cb[cb].out_degree > 1:
-            succ_cbs = succ_dg_of_cb[cb]
-
-            succ_cb_gid_dict.clear()
-            for succ_cb in succ_cbs:
-                if succ_cb in seen_cb:
-                    continue
-                if succ_cb._routing_id in succ_cb_gid_dict:
-                    succ_cb_gid_dict[succ_cb._routing_id].append(succ_cb)
-                else:
-                    succ_cb_gid_dict[succ_cb._routing_id] = [succ_cb]
-
-            for v in succ_cb_gid_dict.values():
-                routing_groups.append(RoutingGroup(*v))
-
-            seen_cb.update(succ_cbs)
-
-    routing_groups_succ: dict[RoutingGroup, list[RoutingGroup]] = defaultdict(list)
-
-    for rg in routing_groups:
-        routing_groups_succ[rg] = []
-        rg_succ_cb: set[CoreBlock] = set()
-        for cb in rg:
-            rg_succ_cb.update(succ_dg_of_cb[cb])
-
-        for _rg in routing_groups:
-            for cb in rg_succ_cb:
-                if cb in _rg:
-                    routing_groups_succ[rg].append(_rg)
-                    break
-
-    return routing_groups, routing_groups_succ
-
-
 def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
     """
     Topological sort algorithm by Kahn [1]_.
diff --git a/tests/backend/__init__.py b/tests/backend/__init__.py
index e69de29b..a6086af0 100644
--- a/tests/backend/__init__.py
+++ b/tests/backend/__init__.py
@@ -0,0 +1,10 @@
+# The backend will use orjson if available.
+try:
+    import orjson
+
+    JSON_BACKEND = "orjson"
+
+except ModuleNotFoundError:
+    JSON_BACKEND = "json"
+
+print(f"Use {JSON_BACKEND} for json encoding.")

From 24ebf51627f005aca3ce617c0d26ab489b1bb82c Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 30 Oct 2024 09:44:17 +0800
Subject: [PATCH 115/187] =?UTF-8?q?=F0=9F=97=91=EF=B8=8F=20refactor(backen?=
 =?UTF-8?q?d):=20remove=20property=20`routing=5Fid`=20from=20`CoreBlock`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/placement.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index f092b6d4..99a99e51 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -46,7 +46,6 @@
 class CoreBlock(CoreAbstract):
 
     _parents: tuple[FullConnectedSyn, ...]
-    _routing_id: int  # TODO will be deprecated
     seed: int
     """Random seed, legal integer, no more than uint64."""
     _lcn_ex: LCN_EX
@@ -70,23 +69,19 @@ class CoreBlock(CoreAbstract):
     def __init__(
         self,
         *parents: FullConnectedSyn,
-        routing_id: int,
         seed: int,
-        mode: CoreMode = CoreMode.MODE_SNN,
+        mode: CoreMode,
         name: Optional[str] = None,
     ) -> None:
         """Core blocks in SNN mode.
 
         Args:
-            - parents: the parent synapses.
-            - routing_id: id of routing group.
-            - seed: random seed. Default value is 0.
-            - mode: runtime mode of the core block. Default value is `MODE_SNN`.
-            - name: name of the core block. Optional.
+            parents: the parent synapses.
+            seed: random seed. Default value is 0.
+            mode: runtime mode of the core block.
         """
         super().__init__(name)
         self._parents = parents
-        self._routing_id = routing_id
         self.rt_mode = mode
         self.seed = seed
         self._lcn_ex = self._n_axon2lcn_ex()
@@ -376,13 +371,7 @@ def _obj_repr(self) -> str:
         return ", ".join(n.name for n in self.obj)
 
     @classmethod
-    def build(
-        cls,
-        *synapses: FullConnectedSyn,
-        routing_id: int,
-        rt_mode: CoreMode,
-        seed: int = 0,
-    ):
+    def build(cls, *synapses: FullConnectedSyn, rt_mode: CoreMode, seed: int = 0):
         """Group synapses & build `CoreBlock`."""
         if seed > (1 << 64) - 1:
             warnings.warn(
@@ -390,7 +379,7 @@ def build(
                 TruncationWarning,
             )
 
-        return cls(*synapses, routing_id=routing_id, mode=rt_mode, seed=seed)
+        return cls(*synapses, mode=rt_mode, seed=seed)
 
     @classmethod
     def build_core_blocks(cls, route_group: MergedSuccGroup) -> list["CoreBlock"]:
@@ -410,7 +399,7 @@ def build_core_blocks(cls, route_group: MergedSuccGroup) -> list["CoreBlock"]:
             for i in idx:
                 succ_edges.update(route_group.outputs[succ_nodes[i]])
 
-            core_block = CoreBlock.build(*succ_edges, routing_id=0, rt_mode=mode)
+            core_block = CoreBlock.build(*succ_edges, rt_mode=mode)
             core_blocks.append(core_block)
 
         return core_blocks

From b1efdce8bddfeef32a5c36e0af3b734a810da89e Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 30 Oct 2024 11:41:07 +0800
Subject: [PATCH 116/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(backend):?=
 =?UTF-8?q?=20refactor=20node=20constraints?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/constrs.py     | 87 ++++++++++++++++-------------------
 paibox/backend/placement.py   | 17 ++++---
 tests/backend/test_constrs.py | 39 +++++++++++-----
 3 files changed, 74 insertions(+), 69 deletions(-)

diff --git a/paibox/backend/constrs.py b/paibox/backend/constrs.py
index 0e9be86a..51bdc1dc 100644
--- a/paibox/backend/constrs.py
+++ b/paibox/backend/constrs.py
@@ -1,16 +1,16 @@
 import sys
 from collections import defaultdict
-from collections.abc import Sequence
 from typing import ClassVar
 
-from .types import NodeName, NodeType
+from .types import NodeType
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias
 else:
     from typing_extensions import TypeAlias
 
-BoundedConstrType: TypeAlias = list[frozenset[NodeName]]
+NodeIdx: TypeAlias = int
+NodeConstrsAttr: TypeAlias = str
 
 
 class Constraints:
@@ -18,57 +18,48 @@ class Constraints:
 
 
 class GraphNodeConstrs(Constraints):
-    BOUNDED_CONSTRS: ClassVar[list[list[NodeName]]] = []
-    CONFLICTED_CONSTRS: ClassVar[dict[NodeName, tuple[NodeName, ...]]] = defaultdict(
-        tuple
-    )
+    node_constr_attrs: ClassVar[list[NodeConstrsAttr]] = [
+        "pool_max",
+        "tick_wait_start",
+        "tick_wait_end",
+    ]
+    """Node attributes that are actually the parameters of the cores."""
 
     @classmethod
-    def clear(cls) -> None:
-        cls.BOUNDED_CONSTRS = []
-        cls.CONFLICTED_CONSTRS = {}
+    def set_constr_attr(cls, attr: NodeConstrsAttr) -> None:
+        if attr not in cls.node_constr_attrs:
+            cls.node_constr_attrs.append(attr)
 
     @classmethod
-    def add_node_constr(
-        cls,
-        *,
-        bounded: Sequence[NodeName] = (),
-        conflicted: dict[NodeName, Sequence[NodeName]] = {},
-    ):
-        """Add constraints to a node."""
-        if len(bounded) > 0:
-            cls.BOUNDED_CONSTRS.append(list(bounded))
-
-        if conflicted:
-            for k, v in conflicted.items():
-                cls.CONFLICTED_CONSTRS[k] = tuple(v)
+    def remove_constr_attr(cls, attr: NodeConstrsAttr, strict: bool = False) -> None:
+        if attr in cls.node_constr_attrs:
+            cls.node_constr_attrs.remove(attr)
+        elif strict:
+            raise ValueError(
+                f"attribute {attr} not found in constraint attributes list."
+            )
 
     @staticmethod
-    def tick_wait_attr_constr(raw_nodes: list[NodeType]) -> list[list[int]]:
-        """Check whether the neurons to be assigned to a group are "equal" after\
-            automatic inference.
+    def apply_constrs(raw_nodes: list[NodeType]) -> list[list[NodeIdx]]:
+        """Group the nodes by the constraints of the nodes.
 
-        NOTE: Check attributes `tick_wait_start` & `tick_wait_end`. For those   \
-            neurons with different attributes, they need to be separated.
+        Args:
+            raw_nodes: nodes that need to be grouped using core parameter constraints.
 
-        Return: returen the group of indices.
+        Returns:
+            a list of groups of node indices.
         """
-        tw_attrs = [
-            (raw_node.tick_wait_start, raw_node.tick_wait_end) for raw_node in raw_nodes
-        ]
-
-        if len(tw_attrs_set := set(tw_attrs)) == 1:
-            return []
-        else:
-            constr = []
-            pos = []
-            for attr in tw_attrs_set:
-                pos.clear()
-                # Find all positions
-                for i, v in enumerate(tw_attrs):
-                    if attr == v:
-                        pos.append(i)
-
-                constr.append(pos.copy())
-
-            return constr
+        grouped_indices = defaultdict(list)
+
+        for i, node in enumerate(raw_nodes):
+            key_lst = []
+            for attr in GraphNodeConstrs.node_constr_attrs:
+                if (v := getattr(node, attr, None)) is None:
+                    raise AttributeError(f"node {node.name} has no attribute {attr}.")
+
+                key_lst.append(v)
+
+            k = tuple(key_lst)
+            grouped_indices[k].append(i)
+
+        return list(grouped_indices.values())
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 99a99e51..accfcccd 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -382,22 +382,21 @@ def build(cls, *synapses: FullConnectedSyn, rt_mode: CoreMode, seed: int = 0):
         return cls(*synapses, mode=rt_mode, seed=seed)
 
     @classmethod
-    def build_core_blocks(cls, route_group: MergedSuccGroup) -> list["CoreBlock"]:
+    def build_core_blocks(cls, msgrp: MergedSuccGroup) -> list["CoreBlock"]:
         core_blocks: list[CoreBlock] = []
-        succ_nodes = list(route_group.nodes)
+        succ_nodes = list(msgrp.nodes)
+        # TODO Currently the runtime mode is not taken into account for grouping constraints,
+        # because in general, a network can only have one mode.
         mode = succ_nodes[0].mode
         if any(node.mode != mode for node in succ_nodes):
             raise NotSupportedError("mixed mode is not supported.")
 
-        # TODO More constraints for nodes can be called here.
-        idx_of_sg = GraphNodeConstrs.tick_wait_attr_constr(succ_nodes)
-        if len(idx_of_sg) == 0:
-            idx_of_sg = [list(range(len(succ_nodes)))]
+        idx_of_sg = GraphNodeConstrs.apply_constrs(succ_nodes)
 
-        for idx in idx_of_sg:
+        for idx_lst in idx_of_sg:
             succ_edges: set[EdgeType] = set()
-            for i in idx:
-                succ_edges.update(route_group.outputs[succ_nodes[i]])
+            for i in idx_lst:
+                succ_edges.update(msgrp.outputs[succ_nodes[i]])
 
             core_block = CoreBlock.build(*succ_edges, rt_mode=mode)
             core_blocks.append(core_block)
diff --git a/tests/backend/test_constrs.py b/tests/backend/test_constrs.py
index e6a29d8d..24d3ed8d 100644
--- a/tests/backend/test_constrs.py
+++ b/tests/backend/test_constrs.py
@@ -5,15 +5,6 @@
 
 
 class TestGraphNodeConstrs:
-    @pytest.mark.skip("Not implemented")
-    def test_add_node_constr(self):
-        constr = GraphNodeConstrs()
-        constr.add_node_constr(bounded=["1", "2", "4"])
-
-        constr.add_node_constr(conflicted={"4": {"1"}})
-
-        assert 1
-
     def test_tick_wait_attr_constr(self):
         n1 = pb.LIF(10, 3, tick_wait_start=1, tick_wait_end=0, name="n1")
         n2 = pb.LIF(10, 3, tick_wait_start=1, tick_wait_end=2, name="n2")
@@ -22,8 +13,32 @@ def test_tick_wait_attr_constr(self):
         n5 = pb.LIF(10, 3, tick_wait_start=2, tick_wait_end=0, name="n5")
         n6 = pb.LIF(10, 3, tick_wait_start=1, tick_wait_end=3, name="n6")
 
-        constr = GraphNodeConstrs.tick_wait_attr_constr([n1, n2, n3, n4, n5, n6])
+        constr = GraphNodeConstrs.apply_constrs([n1, n2, n3, n4, n5, n6])
         assert len(constr) == 4
 
-        constr = GraphNodeConstrs.tick_wait_attr_constr([n4, n5])
-        assert len(constr) == 0
+        constr = GraphNodeConstrs.apply_constrs([n4, n5])
+        assert len(constr) == 1
+
+    def test_apply_constraints(self):
+        n1 = pb.LIF(10, 3, tick_wait_start=1, tick_wait_end=0, name="n1")
+        n2 = pb.LIF(10, 3, tick_wait_start=1, tick_wait_end=2, name="n2")
+        n3 = pb.LIF(10, 3, tick_wait_start=1, tick_wait_end=3, name="n3")
+        n4 = pb.LIF(10, 3, tick_wait_start=2, tick_wait_end=0, name="n4")
+        n5 = pb.LIF(10, 3, tick_wait_start=2, tick_wait_end=0, name="n5")
+        n6 = pb.LIF(10, 3, tick_wait_start=1, tick_wait_end=3, name="n6")
+        n7 = pb.ANNNeuron(10, 0, tick_wait_start=1, tick_wait_end=0, name="n7")
+        n8 = pb.ANNNeuron(
+            10, 0, tick_wait_start=1, tick_wait_end=0, pool_max=True, name="n8"
+        )
+        n9 = pb.ANNNeuron(
+            10, 0, tick_wait_start=1, tick_wait_end=2, pool_max=True, name="n9"
+        )
+        n10 = pb.ANNNeuron(
+            20, 1, tick_wait_start=1, tick_wait_end=2, pool_max=True, name="n10"
+        )
+
+        constr = GraphNodeConstrs.apply_constrs(
+            [n1, n2, n3, n4, n5, n6, n7, n8, n9, n10]
+        )
+
+        assert len(constr) == 6

From 36e12c55b108badf1a66e290d5f13554380a758d Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 30 Oct 2024 11:44:51 +0800
Subject: [PATCH 117/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix:=20fixed=20the=20?=
 =?UTF-8?q?issue=20that=20the=20`pool=5Fmax`=20attribute=20of=20`MaxPool2d?=
 =?UTF-8?q?SemiFolded`=20layer=20was=20not=20considered=20during=20compila?=
 =?UTF-8?q?tion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/placement.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index accfcccd..0c3a4380 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -234,23 +234,36 @@ def dendrite_comb_rate(self) -> int:
     @property
     def tws(self) -> int:
         """Attribute `tick_wait_start`."""
-        if not check_attr_same(self.dest, "tick_wait_start"):
+        _check_attr = "tick_wait_start"
+        if not check_attr_same(self.dest, _check_attr):
             raise AttributeError(
-                "attribute 'tick_wait_start' of the core block are not equal."
+                f"attribute '{_check_attr}' of the core block are not equal."
             )
 
         return self.dest[0].tick_wait_start
 
     @property
     def twe(self) -> int:
-        """Attribute `tick_wait_end.`"""
-        if not check_attr_same(self.dest, "tick_wait_end"):
+        """Attribute `tick_wait_end`."""
+        _check_attr = "tick_wait_end"
+        if not check_attr_same(self.dest, _check_attr):
             raise AttributeError(
-                "attribute 'tick_wait_end' of the core block are not equal."
+                f"attribute '{_check_attr}' of the core block are not equal."
             )
 
         return self.dest[0].tick_wait_end
 
+    @property
+    def pool_max(self) -> MaxPoolingEnable:
+        """Attribute `pool_max`."""
+        _check_attr = "pool_max"
+        if not check_attr_same(self.dest, _check_attr):
+            raise AttributeError(
+                f"attribute '{_check_attr}' of the core block are not equal."
+            )
+
+        return self.dest[0].pool_max
+
     @property
     def n_axon(self) -> int:
         return sum(s.num_out for s in self.axons)
@@ -692,7 +705,7 @@ def export_param_config(self) -> CoreConfig:
             _mode_params[0],                    # input_width_format
             _mode_params[1],                    # spike_width_format
             self.n_working_dendrite,            # num_dendrite
-            MaxPoolingEnable.DISABLE,           # max_pooling_en
+            self.pool_max,                      # max_pooling_en
             self.tws,                           # tick_wait_start
             self.twe,                           # tick_wait_end
             _mode_params[2],                    # snn_mode_en
@@ -814,6 +827,10 @@ def tws(self) -> int:
     def twe(self) -> int:
         return self.parent.twe
 
+    @property
+    def pool_max(self) -> MaxPoolingEnable:
+        return self.parent.pool_max
+
     @property
     def n_working_dendrite(self) -> int:
         """The number of actual working dendrites.

From 24775c80448f039823ffda3db678a6876cb4a511 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 4 Nov 2024 17:04:44 +0800
Subject: [PATCH 118/187] =?UTF-8?q?=F0=9F=94=A8=20opt(wram):=20update=20lo?=
 =?UTF-8?q?gic?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/placement.py | 55 +++++++++++++++++++++----------------
 paibox/backend/types.py     |  2 +-
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 0c3a4380..08da348e 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -43,6 +43,16 @@
 )
 
 
+# Get the fan-out by the combination rate of dendrites
+if hasattr(HwConfig, "FANOUT_IW8"):
+    FANOUT_IW8 = HwConfig.FANOUT_IW8
+else:
+    FANOUT_IW8 = [HwConfig.N_NEURON_MAX_ANN, 1364, 876, 512, 256, 128, 64, 32, 16, 8]
+
+
+NEURON_PARAMS_BIT_LENGTH = 214  # A constant of frame definition
+
+
 class CoreBlock(CoreAbstract):
 
     _parents: tuple[FullConnectedSyn, ...]
@@ -553,7 +563,7 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
                     w_folded_3d[c * self.n_timeslot + lcn, :, :],
                     axis=1,
                     count=self.n_weight_bits,
-                    bitorder="little",
+                    bitorder=HwConfig.WEIGHT_BITORDER,
                 )
 
                 for bit in range(self.n_weight_bits):
@@ -586,21 +596,19 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
     def _nfold_weight(
         raw_weight: WeightType, expected_row: int, n_fold: int
     ) -> WeightType:
-        """According to the folding ratio `n_fold`, fold the weight matrix.
+        """Fold the weight matrix according to the folding ratio.
 
         Args:
-            - raw_weight: the raw weight matrix.
-            - expected_row: expected #N of row.
-            - n_fold: the folding ratio.
+            raw_weight: the raw weight matrix.
+            expected_row: the expected #N of row.
+            n_fold: the folding ratio (1 << LCN).
         """
         raw_row, raw_col = raw_weight.shape
+        n_row_folded, r = divmod(raw_row, n_fold)  # #N of rows after folding
 
-        if (r := raw_row % n_fold) > 0:
-            _raw_weight = np.append(
-                raw_weight,
-                np.zeros((n_fold - r, raw_col), dtype=WEIGHT_DTYPE),
-                axis=0,
-            )
+        if r > 0:
+            n_row_folded += 1
+            _raw_weight = np.pad(raw_weight, ((0, n_fold - r), (0, 0)))
         else:
             _raw_weight = raw_weight
 
@@ -609,15 +617,17 @@ def _nfold_weight(
 
         for i, j in np.ndindex((n_fold, raw_col)):
             w_col = w_splited[i][:, j]
-            w_folded[:, n_fold * j + i] = w_col
+            w_folded[:n_row_folded, j * n_fold + i] = w_col
 
         return w_folded
 
     @staticmethod
     def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
-        """Convert the unpacked weights into a mapping format, corresponding to the WRAM address, each address contains \
+        """Convert the unpacked weights into a mapping form, corresponding to the WRAM address. Each address contains \
             18 uint64.
             (1152, x) -> (x, 1152) -> (x*18, 64) -> (x*18, 8) uint8 -> (x*18, 1) uint64 -> (x, 18) uint64.
+            
+            TODO simpler (1152, x) -> (x, 1152) -> pack -> (x, 144) uint8 -> (x, 18) uint64. (x <= 512)
         """
         # Reshape to 64 columns to avoid contiguous problem.
         w_unpacked_aligned = w_unpacked.T.reshape((-1, N_BIT_PACKED_WEIGHT))
@@ -629,6 +639,11 @@ def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
         w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape(
             (w_unpacked.shape[1], -1)
         )
+        # TODO If everything is okay, use the simpler method as follows:
+        # w_packed_u8 = np.packbits(
+        #     w_unpacked.T, axis=1, bitorder=HwConfig.WEIGHT_BITORDER
+        # )
+        # w_packed_u64 = np.ascontiguousarray(w_packed_u8).view(WRAM_PACKED_DTYPE)
         w_packed_u64.setflags(write=False)
 
         return w_packed_u64
@@ -666,7 +681,9 @@ def neu_params_mapping(neu_confs: list[NeuronConfig]) -> WRAMPackedType:
             for i in range(neu_conf.neu_seg.n_neuron):
                 params = frame3.packages[i * 4 : (i + 1) * 4]
                 neu_conf_params[i, :] = np.unpackbits(
-                    params.view(WRAM_UNPACKED_DTYPE), axis=0, bitorder="little"
+                    params.view(WRAM_UNPACKED_DTYPE),
+                    axis=0,
+                    bitorder=HwConfig.WEIGHT_BITORDER,
                 )[:NEURON_PARAMS_BIT_LENGTH]
 
             neu_conf_params_list.append(neu_conf_params)
@@ -910,13 +927,3 @@ def n_core_required(self) -> int:
 def max_lcn_of_cb(cb: list[CoreBlock]) -> LCN_EX:
     """Find the max LCN extenion of previous grouped synapses"""
     return max(cb, key=lambda cb: cb.lcn_ex).lcn_ex
-
-
-# Get the fan-out by the combination rate of dendrites
-if hasattr(HwConfig, "FANOUT_IW8"):
-    FANOUT_IW8 = HwConfig.FANOUT_IW8  # type: ignore
-else:
-    FANOUT_IW8 = [HwConfig.N_NEURON_MAX_ANN, 1364, 876, 512, 256, 128, 64, 32, 16, 8]
-
-
-NEURON_PARAMS_BIT_LENGTH = 214
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 2463cf72..ff7984ed 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -55,7 +55,7 @@
 WRAMUnpackedType: TypeAlias = NDArray[WRAM_UNPACKED_DTYPE]
 # Type of packed weight in WRAM
 WRAMPackedType: TypeAlias = NDArray[WRAM_PACKED_DTYPE]
-N_BIT_PACKED_WEIGHT = WRAM_PACKED_DTYPE(1).nbytes * 8  # #N bits of packed weight
+N_BIT_PACKED_WEIGHT = np.iinfo(WRAM_PACKED_DTYPE).bits
 
 _COORD_UNSET = Coord(0, 0)
 _RID_UNSET = RId(0, 0)

From b3e9e04756162920c66ab4eb635461dd6bde71d6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 4 Nov 2024 17:03:03 +0000
Subject: [PATCH 119/187] :arrow_up: auto update by pre-commit hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 24.8.0 → 24.10.0](https://github.com/psf/black/compare/24.8.0...24.10.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 126df612..490b6d15 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
         stages: [pre-commit]
 
   - repo: https://github.com/psf/black
-    rev: 24.8.0
+    rev: 24.10.0
     hooks:
       - id: black
         stages: [pre-commit]

From 9853b6b68826f56234152d786c11aac55e662073 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 4 Nov 2024 17:06:13 +0800
Subject: [PATCH 120/187] =?UTF-8?q?=E2=9C=85=20test(wram):=20add=20check?=
 =?UTF-8?q?=20for=20iw8=20wram=20mapping=20tests,=20optimize=20test=20case?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_placement.py | 192 ++++++++++++++------------------
 1 file changed, 86 insertions(+), 106 deletions(-)

diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index 1d0c88e7..ea8aedbb 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -12,7 +12,7 @@
 from paicorelib.framelib import OfflineFrameGen
 
 import paibox as pb
-from paibox.backend.placement import CorePlacement
+from paibox.backend.placement import CorePlacement, FANOUT_IW8
 from paibox.backend.types import (
     WRAM_PACKED_DTYPE,
     WRAM_UNPACKED_DTYPE,
@@ -25,13 +25,28 @@
 
 from .test_conf_exporting import _gen_random_neuron_dest_info
 
+if hasattr(HwConfig, "WEIGHT_BITORDER"):
+    W_BITORDER = HwConfig.WEIGHT_BITORDER
+else:
+    W_BITORDER = "little"
+
+N_BIT_PACKED_WEIGHT = np.iinfo(WRAM_PACKED_DTYPE).bits
+if hasattr(CorePlacement, "WRAM_BASE_SHAPE"):
+    WRAM_BASE_SHAPE = CorePlacement.WRAM_BASE_SHAPE
+else:
+    WRAM_BASE_SHAPE = (HwConfig.ADDR_AXON_MAX + 1, HwConfig.ADDR_RAM_MAX + 1)
+
+
+NEURON_PARAMS_BIT_LENGTH = 214
+N_NEURON_PARAM_IN_COL = HwConfig.N_FANIN_PER_DENDRITE_MAX // NEURON_PARAMS_BIT_LENGTH
+
 
 def _packbits_ref(bits: np.ndarray, count: Optional[int] = None) -> int:
     """Pack unsigned bits (from LSB to MSB) into a signed integer.
 
     Args:
-        - bits: an array of bits from LSB to MSB(sign bit).
-        - count: `bits` is an N-bit signed integer. If not provided, it is  \
+        bits (np.ndarray): an array of bits from LSB to MSB(sign bit).
+        count (int, optional): `bits` is an N-bit signed integer. If not provided, it is  \
             assumed to be the same as `bits.size`.
     """
     if count is None:
@@ -92,52 +107,6 @@ def test_get_raw_weight(fixed_rng: np.random.Generator):
         w_of_neu_segs_of_cb.append(w_of_neu_segs)
 
 
-def test_nfold_weight():
-    """A prototype function of `_nfold_weight` to test the weight folding."""
-    original_matrix = np.arange(1, 25, dtype=WEIGHT_DTYPE).reshape(8, 3)
-    nfold = 3
-
-    if (r := original_matrix.shape[0] % nfold) > 0:
-        w_padding = np.append(
-            original_matrix,
-            values=np.zeros((nfold - r, original_matrix.shape[1]), dtype=WEIGHT_DTYPE),
-            axis=0,
-        )
-    else:
-        w_padding = original_matrix
-
-    split = np.vsplit(w_padding, nfold)
-
-    expected_row = w_padding.shape[0] // nfold
-    result = np.zeros(
-        (expected_row, original_matrix.shape[1] * nfold),
-        dtype=WEIGHT_DTYPE,
-    )
-
-    for i, j in np.ndindex((nfold, original_matrix.shape[1])):
-        g = split[i][:, j]
-        result[:, j * nfold + i] = g
-
-    assert np.array_equal(
-        result,
-        np.array(
-            [
-                [1, 10, 19, 2, 11, 20, 3, 12, 21],
-                [4, 13, 22, 5, 14, 23, 6, 15, 24],
-                [7, 16, 0, 8, 17, 0, 9, 18, 0],
-            ],
-            dtype=WEIGHT_DTYPE,
-        ),
-    )
-
-
-N_BIT_PACKED_WEIGHT = WRAM_PACKED_DTYPE(1).nbytes * 8  # #N bits of packed weight
-if hasattr(CorePlacement, "WRAM_BASE_SHAPE"):
-    WRAM_BASE_SHAPE = CorePlacement.WRAM_BASE_SHAPE
-else:
-    WRAM_BASE_SHAPE = (HwConfig.ADDR_AXON_MAX + 1, HwConfig.ADDR_RAM_MAX + 1)
-
-
 def _get_max_fanout(iw: int, dendr_comb_rate: int) -> int:
     if iw == 1:
         return HwConfig.N_DENDRITE_MAX_SNN >> dendr_comb_rate
@@ -164,11 +133,11 @@ def test_signed_unpackbits(self):
 
             for actual_signed in actual_array:
                 unpacked = np.unpackbits(
-                    np.uint8(actual_signed), axis=0, count=nbit, bitorder="little"
+                    np.uint8(actual_signed), axis=0, count=nbit, bitorder=W_BITORDER
                 )
                 assert actual_signed == _packbits_ref(unpacked, nbit)
 
-    @pytest.mark.skipif(sys.byteorder != "little", reason="not little-endian")
+    @pytest.mark.skipif(sys.byteorder != W_BITORDER, reason=f"not {W_BITORDER}-endian")
     def test_uint8_unpackbits_scalar(self):
         x1 = np.int8(101)  # 01100101
         assert x1 == 0b01100101
@@ -176,8 +145,8 @@ def test_uint8_unpackbits_scalar(self):
 
         assert np.uint8(x2) == 0b11100101
 
-        y1 = np.unpackbits(np.uint8(x1), bitorder="little")
-        y2 = np.unpackbits(np.uint8(x2), bitorder="little")
+        y1 = np.unpackbits(np.uint8(x1), bitorder=W_BITORDER)
+        y2 = np.unpackbits(np.uint8(x2), bitorder=W_BITORDER)
 
         assert np.array_equal(y1, np.array([1, 0, 1, 0, 0, 1, 1, 0], dtype=np.uint8))
         assert np.array_equal(y2, np.array([1, 0, 1, 0, 0, 1, 1, 1], dtype=np.uint8))
@@ -221,23 +190,43 @@ def _weight_pack(w: WeightType, nbit: int, nfold: int) -> WRAMPackedType:
         w_unpacked_aligned = wram_base_shape.T.reshape((-1, N_BIT_PACKED_WEIGHT))
 
         # -> (512*18)*8 uint8
-        w_packed_u8 = np.packbits(w_unpacked_aligned, axis=1, bitorder="little")
+        w_packed_u8 = np.packbits(w_unpacked_aligned, axis=1, bitorder=W_BITORDER)
         assert w_packed_u8.shape[1] == 8
 
         _n_u64 = WRAM_BASE_SHAPE[0] // N_BIT_PACKED_WEIGHT
         # -> (512*18)*1 uint64 -> 512*18 uint64
         w_packed_u64 = w_packed_u8.view(WRAM_PACKED_DTYPE).reshape((-1, _n_u64))
 
+        # -> 512*144 -> 512*18 uint64
+        a = np.packbits(wram_base_shape.T, axis=1, bitorder=W_BITORDER)
+        b = np.ascontiguousarray(a).view(WRAM_PACKED_DTYPE)
+
+        assert np.array_equal(w_packed_u64, b)
+
         return w_packed_u64
 
 
-from paibox.backend.placement import FANOUT_IW8
+class TestWeightRamMapping:
+    @pytest.mark.parametrize("expected_row", [3, 5, 7])
+    def test_nfold_weight(self, expected_row):
+        """A prototype function of `CorePlacement._nfold_weight` to test the weight folding."""
+        original_matrix = np.arange(1, 25, dtype=WEIGHT_DTYPE).reshape(8, 3)
+        nfold = 3
 
-NEURON_PARAMS_BIT_LENGTH = 214
-N_NEURON_PARAM_IN_COL = HwConfig.N_FANIN_PER_DENDRITE_MAX // NEURON_PARAMS_BIT_LENGTH
+        assert nfold <= expected_row
+        result = CorePlacement._nfold_weight(original_matrix, expected_row, nfold)
 
+        expected_folded = np.array(
+            [
+                [1, 10, 19, 2, 11, 20, 3, 12, 21],
+                [4, 13, 22, 5, 14, 23, 6, 15, 24],
+                [7, 16, 0, 8, 17, 0, 9, 18, 0],
+            ],
+            dtype=WEIGHT_DTYPE,
+        )
+        expected = np.pad(expected_folded, ((0, expected_row - nfold), (0, 0)))
 
-class TestWeightRamMapping:
+        assert np.array_equal(result, expected)
 
     @pytest.mark.parametrize(
         "shape, wp, lcn_ex",
@@ -279,7 +268,7 @@ def test_weight_ram_mapping_iw1(
         test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
 
         # 1. Fold, return the folded weight after padding.
-        w_folded = self._fold_raw_weight_single(test_weight, expected_shape[0], nfold)
+        w_folded = CorePlacement._nfold_weight(test_weight, expected_shape[0], nfold)
 
         # 2. Map to the WRAM.
         wram_unpacked = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
@@ -337,7 +326,7 @@ def test_weight_ram_mapping_iw8(
         test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
 
         # 1. Fold, return the folded weight after padding.
-        w_folded = self._fold_raw_weight_single(test_weight, expected_shape[0], nfold)
+        w_folded = CorePlacement._nfold_weight(test_weight, expected_shape[0], nfold)
 
         # 2. Map to the NRAM.
         wram_unpacked = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
@@ -352,11 +341,12 @@ def test_weight_ram_mapping_iw8(
             assert wram_weight.shape[1] + wram_neurons.shape[1] <= WRAM_BASE_SHAPE[1]
             wram_unpacked[:, -wram_neurons.shape[1] :] = wram_neurons
 
-        # TODO how to check
+        # 3. Check
+        self._wram_mapping_check_iw8(test_weight, w_folded, wram_unpacked, nbit, nfold)
 
     @staticmethod
     def _weight_ram_mapping(
-        folded_weights: WeightType, n_bit: int, n_fold: int, iw: Literal[1, 8]
+        w_folded: WeightType, n_bit: int, n_fold: int, iw: Literal[1, 8]
     ) -> WRAMUnpackedType:
         if iw == 1:
             # The length of slot for each bit of input data
@@ -365,30 +355,26 @@ def _weight_ram_mapping(
             # N_FANIN_PER_DENDRITE_SNN // iw
             bit_slot_length = HwConfig.N_FANIN_PER_DENDRITE_ANN
 
-        folded_row, folded_col = folded_weights.shape
+        folded_row, folded_col = w_folded.shape
         n_dendrite_comb = n_bit * n_fold
         # oc * e / (8/w) = oc * d / 8
         orig_col = folded_col // n_fold
         result_col = math.ceil(orig_col * n_dendrite_comb / iw)
         # Units are divided into small blocks of columns, fan-in extension
-        # (oc, lcn, nbit, 144/1152)
+        # (oc, lcn, nbit, 144 or 1152)
         cew_block = np.zeros(
             (orig_col, n_fold, n_bit, bit_slot_length), dtype=WRAM_UNPACKED_DTYPE
         )
         # [N*M] -> [M*N*1]
-        folded_weights_3d = np.expand_dims(folded_weights.T, axis=2).view(
-            WRAM_UNPACKED_DTYPE
-        )
+        w_folded_3d = np.expand_dims(w_folded.T, axis=2).view(WRAM_UNPACKED_DTYPE)
         for c in range(orig_col):
             for lcn in range(n_fold):
-                # Unpack the array [N*1] -> [N*8]
-                # [0, :]-> [folded_row, :]: A[0] -> A[folded_row-1]
-                # [:, 0]->[:,7]: LSB->MSB
+                # Unpack the array [N*1] -> [N*n_bit], LSB->MSB
                 unpacked = np.unpackbits(
-                    folded_weights_3d[c * n_fold + lcn, :, :],
+                    w_folded_3d[c * n_fold + lcn, :, :],
                     axis=1,
                     count=n_bit,
-                    bitorder="little",
+                    bitorder=W_BITORDER,
                 )
 
                 for bit in range(n_bit):
@@ -479,7 +465,7 @@ def test_weight_ram_mapping_neurons_limit(
         test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
 
         # 1. Fold, return the folded weight after padding.
-        w_folded = self._fold_raw_weight_single(test_weight, expected_shape[0], nfold)
+        w_folded = CorePlacement._nfold_weight(test_weight, expected_shape[0], nfold)
 
         # 2. Map to the NRAM.
         with expectation:
@@ -513,13 +499,13 @@ def _weight_ram_mapping_iw8(
         for c in range(orig_col):
             for lcn in range(n_fold):
                 # For every m in M, unpack the array [N*1] -> [N*8]
-                # [0, :]-> [row, :]: A[0] -> A[row-1]
-                # [:, 0]->[:,7]: LSB->MSB
+                # [0,:] -> [row,:]: A[0] -> A[row-1]
+                # [:,0] -> [:,7]: LSB->MSB
                 unpacked = np.unpackbits(
                     folded_weights_3d[c * n_fold + lcn, :, :],
                     axis=1,
                     count=n_bit,
-                    bitorder="little",
+                    bitorder=W_BITORDER,
                 )
 
                 for bit in range(n_bit):
@@ -596,27 +582,6 @@ def _weight_ram_mapping_iw8(
 
         return result
 
-    @staticmethod
-    def _fold_raw_weight_single(raw_weight: WeightType, expected_row: int, nfold: int):
-        raw_row, raw_col = raw_weight.shape
-
-        if (r := raw_row % nfold) > 0:
-            _padding = nfold - r
-            assert expected_row * nfold == raw_row + _padding
-
-            w_padding = np.pad(raw_weight, ((0, _padding), (0, 0)))
-        else:
-            w_padding = raw_weight
-
-        split = np.vsplit(w_padding, nfold)
-        w_folded = np.zeros((expected_row, raw_col * nfold), dtype=WEIGHT_DTYPE)
-
-        for i, j in np.ndindex((nfold, raw_col)):
-            w_col = split[i][:, j]
-            w_folded[:, j * nfold + i] = w_col
-
-        return w_folded
-
     # at commit 67054d8
     @staticmethod
     def _weight_ram_mapping_iw1_old(folded_weights: np.ndarray, n_bit: int):
@@ -630,7 +595,7 @@ def _weight_ram_mapping_iw1_old(folded_weights: np.ndarray, n_bit: int):
         for i in range(col):
             # For every m in M, unpack the array [N*1] -> [N*8]
             unpacked = np.unpackbits(
-                folded_weights_3d[i], axis=1, count=n_bit, bitorder="little"
+                folded_weights_3d[i], axis=1, count=n_bit, bitorder=W_BITORDER
             )
 
             result[:, n_bit * i : n_bit * (i + 1)] = unpacked
@@ -648,14 +613,14 @@ def _wram_mapping_check_iw1(
         nbit: int,
         nfold: int,
     ) -> None:
+        n_in_col = w_folded.shape[0]
         for i, j in np.ndindex(test_data.shape):
-            n_in_col = w_folded.shape[0]
             offset_j, now_i = divmod(i, n_in_col)
             now_j = offset_j + j * nfold
 
             wij = w_unpacked[now_i, now_j * nbit : (now_j + 1) * nbit]
-
             wij_packed = _packbits_ref(wij, nbit)
+
             assert test_data[i, j] == wij_packed
 
     @staticmethod
@@ -666,7 +631,22 @@ def _wram_mapping_check_iw8(
         nbit: int,
         nfold: int,
     ) -> None:
-        pass
+        n_in_col = w_folded.shape[0]
+        n_lcn_in_col = 8 // nbit  # The amount of lcn in one coloumn
+
+        for i, j in np.ndindex(test_data.shape):
+            # Get the coordinate (i_folded, j_folded) in the folded weight
+            offset_j, i_folded = divmod(i, n_in_col)
+            j_folded = offset_j + j * nfold
+            # Get the index of E-block
+            e_j, e_i = divmod(j_folded, n_lcn_in_col)
+            # Just get `nbit` bits
+            wij = w_unpacked[i_folded :: HwConfig.N_FANIN_PER_DENDRITE_ANN, e_j][
+                e_i * nbit : (e_i + 1) * nbit
+            ]
+            wij_packed = _packbits_ref(wij, nbit)
+
+            assert test_data[i, j] == wij_packed
 
     @pytest.mark.parametrize(
         "shape, wp, lcn_ex",
@@ -717,7 +697,7 @@ def _gen_wram_for_neurons(n_extra_neurons: int, wp, lcn_ex):
             params = frame3.packages[i * 4 : (i + 1) * 4]
             # [0:NEURON_PARAMS_BIT_LENGTH]:LSB to MSB + [NEURON_PARAMS_BIT_LENGTH:]:0
             neuron_params_214b[i, :] = np.unpackbits(
-                params.view(WRAM_UNPACKED_DTYPE), axis=0, bitorder="little"
+                params.view(WRAM_UNPACKED_DTYPE), axis=0, bitorder=W_BITORDER
             )[:NEURON_PARAMS_BIT_LENGTH]
 
         # Slow method
@@ -736,7 +716,7 @@ def _gen_wram_for_neurons(n_extra_neurons: int, wp, lcn_ex):
                 * NEURON_PARAMS_BIT_LENGTH,
                 idx_col,
             ] = neuron_params_214b[i, :].squeeze()
-        # Slow method ends.
+        # Slow method ends
 
         # Pad the row of neuron parameters to a multiple of `N_NEURON_PARAM_IN_COL`
         if (r := neuron_params_214b.shape[0] % N_NEURON_PARAM_IN_COL) > 0:
@@ -764,7 +744,7 @@ def test_weight_ram_mapping_8bits(self):
 
         array = np.random.randint(-128, 128, size=(4, 4), dtype=WEIGHT_DTYPE)
 
-        y = np.unpackbits(np.uint8(array), axis=1, bitorder="little")
+        y = np.unpackbits(np.uint8(array), axis=1, bitorder=W_BITORDER)
         assert y.shape == (4, (1 << wp) * 4)
 
         binary_conn[: y.shape[0], : y.shape[1]] = y
@@ -785,7 +765,7 @@ def test_weight_ram_mapping_4bits(self):
 
         for i in range(4):
             ual = np.uint8(np.expand_dims(array[:, i], axis=1))
-            a = np.unpackbits(ual, axis=1, count=4, bitorder="little")
+            a = np.unpackbits(ual, axis=1, count=4, bitorder=W_BITORDER)
             y[: a.shape[0], (1 << wp) * i : (1 << wp) * (i + 1)] = a
 
         assert y.shape == (4, (1 << wp) * 4)
@@ -808,7 +788,7 @@ def test_weight_ram_mapping_2bits(self):
 
         for i in range(4):
             ual = np.uint8(np.expand_dims(array[:, i], axis=1))
-            a = np.unpackbits(ual, axis=1, count=2, bitorder="little")
+            a = np.unpackbits(ual, axis=1, count=2, bitorder=W_BITORDER)
             y[: a.shape[0], (1 << wp) * i : (1 << wp) * (i + 1)] = a
 
         assert y.shape == (4, (1 << wp) * 4)

From e8e31fdb6c3166210a4e27970b3f4676c4293f40 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 5 Nov 2024 10:54:12 +0800
Subject: [PATCH 121/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix(wram):=20fixed=20?=
 =?UTF-8?q?incorrect=20mapping=20of=20neuron=20parameters=20on=20WRAM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/conf_exporting.py | 17 ++++----
 paibox/backend/placement.py      | 75 ++++++++++++++++++++++----------
 paibox/backend/types.py          |  2 +-
 3 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/paibox/backend/conf_exporting.py b/paibox/backend/conf_exporting.py
index 7563eeef..e9584515 100644
--- a/paibox/backend/conf_exporting.py
+++ b/paibox/backend/conf_exporting.py
@@ -103,10 +103,6 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                             neu_conf.neuron_attrs,
                             neu_conf.neuron_dest_info,
                             neu_conf.neu_seg.repeat,
-                            # v.params_reg.n_repeat_nram,
-                            # XXX Is the parameter 'repeat' passed in from the previous step, or
-                            # is it calculated automatically in the parametric model?
-                            # Need to check this parameter?
                         )
                     )
                 else:
@@ -158,26 +154,29 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                     core_coord,
                     _RID_UNSET,
                     0,
-                    18 * v.weight_ram.shape[0],
+                    v.weight_ram.size,
                     v.weight_ram,
                 )
 
                 _concat_frames.append(config_frame_type4_w.value)
 
+            # Extra neurons part
             if neu_conf_on_wram:
+                # Only the part that is mapped to the neuron parameters is returned.
                 neu_on_wram = CorePlacement.neu_params_mapping(neu_conf_on_wram)
-                # Extra neurons part
                 assert (
                     v.weight_ram.shape[0] + neu_on_wram.shape[0]
-                    <= HwConfig.ADDR_RAM_MAX + 1
+                    <= CorePlacement.WRAM_BASE_SHAPE[1]
                 )
+
                 config_frame_type4_n = OfflineFrameGen.gen_config_frame4(
                     chip_coord,
                     core_coord,
                     _RID_UNSET,
-                    # Start after the weights mapped to the WRAM
+                    # `v.weigh_ram` already contains the mapped & unallocated parts for weight mapping,
+                    # so `neu_on_wram` can be placed next to it.
                     v.weight_ram.shape[0],
-                    18 * neu_on_wram.shape[0],
+                    neu_on_wram.size,
                     neu_on_wram,
                 )
 
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 08da348e..c498ed81 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -451,6 +451,10 @@ class CorePlacement(CoreAbstract):
         HwConfig.ADDR_AXON_MAX + 1,
         HwConfig.ADDR_RAM_MAX + 1,
     )
+    """The base shape of weight RAM."""
+
+    N_U64_ON_WRAM_ADDR: ClassVar[int] = WRAM_BASE_SHAPE[0] // N_BIT_PACKED_WEIGHT
+    """The number of u64 at each address of weight RAM."""
 
     def __init__(
         self,
@@ -530,27 +534,32 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
             This function was tested using only the prototype functions. For test items, please refer to                \
             tests/backend/test_placement.py::TestWeightRamMapping for details.
 
-        Return:
-            The packed matrix of weights mapped to the WRAM, with shape (x, 18) (x <= 512).
+        Returns:
+            The packed matrix of weights mapped to the WRAM, with shape (x, N_U64_ON_WRAM_ADDR) uint64 (x <= 512). The  \
+            entire WRAM contains up to 4 parts: the mapped & unallocated part for weights & neuron parameters.          \
+            For example,
+
+            W1 = W[:x1  ,:]: the mapped part for weights.
+            W2 = W[x1:x2,:]: the unallocated part for weights(0).
+            W3 = W[x2:x3,:]: the mapped part for neurons parameters.
+            W4 = W[x3:  ,:]: the unallocated part for neurons parameters(0). Since it is at the end of WRAM, we don't   \
+                care about it.
+
+            0 < x1 < x2 < x3 <= 512.
+
+            This function only processes the weight part, that is, returns W1+W2 = W[:x2,:].
         """
         w_folded = self._fold_raw_weights(self.raw_weights)
         folded_row, _ = w_folded.shape
-        # The 1152*512 unpacked weight, uint8 but only 0 & 1.
-        # wram_unpacked = np.zeros(self.WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
-
-        if is_iw8(self.rt_mode):
-            # The length of slot for each bit of input data
-            iw, bit_slot_length = 8, HwConfig.N_FANIN_PER_DENDRITE_ANN
-        else:
-            iw, bit_slot_length = 1, HwConfig.N_FANIN_PER_DENDRITE_SNN
 
+        iw = 8 if is_iw8(self.rt_mode) else 1
         n_dendrite_comb = 1 << self.dendrite_comb_rate
         # oc * e / (8/w) = oc * d / 8
         orig_col = self.n_neuron
         result_col = math.ceil(orig_col * n_dendrite_comb / iw)
         # Units are divided into small blocks of columns, fan-in extension
         cew_block = np.zeros(
-            (orig_col, self.n_timeslot, self.n_weight_bits, bit_slot_length),
+            (orig_col, self.n_timeslot, self.n_weight_bits, self.parent.n_fanin_base),
             dtype=WRAM_UNPACKED_DTYPE,
         )
 
@@ -585,12 +594,26 @@ def _weight_ram_mapping(self) -> WRAMPackedType:
                 (cew_block.shape[0] // n_col_comb_in_col, -1)
             ).T
 
-        # For 8-bit input width, here is only the weight mapped to the WRAM. Extra neurons
-        # paramaters will be mapped to the WRAM when exporting the configuration frames.
-        # wram_unpacked[:, : w_mapped.shape[1]] = w_mapped
+        wram_packed = self._weight_pack(w_mapped)
 
-        # `w_mapped` is only the weight mapped to the WRAM. The shape[1] of `w_mapped` <= 512.
-        return self._weight_pack(w_mapped)
+        # Available columns for weight mapping to the WRAM.
+        if iw == 1:
+            n_col_weight_on_wram = CorePlacement.WRAM_BASE_SHAPE[1]
+        else:
+            n_144b_dendrites = (
+                FANOUT_IW8[self.dendrite_comb_rate] << self.dendrite_comb_rate
+            )
+            n_col_weight_on_wram = n_144b_dendrites // iw
+
+        # The mapped & unallocated part for weights, W1+W2
+        wram_weight_packed = np.zeros(
+            (n_col_weight_on_wram, CorePlacement.N_U64_ON_WRAM_ADDR),
+            dtype=WRAM_PACKED_DTYPE,
+        )
+        wram_weight_packed[: wram_packed.shape[0], :] = wram_packed
+        wram_weight_packed.setflags(write=False)
+
+        return wram_weight_packed
 
     @staticmethod
     def _nfold_weight(
@@ -623,11 +646,14 @@ def _nfold_weight(
 
     @staticmethod
     def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
-        """Convert the unpacked weights into a mapping form, corresponding to the WRAM address. Each address contains \
-            18 uint64.
+        """Convert the unpacked weights into a mapping form, corresponding to the WRAM address. Each address contains   \
+            uint64.
             (1152, x) -> (x, 1152) -> (x*18, 64) -> (x*18, 8) uint8 -> (x*18, 1) uint64 -> (x, 18) uint64.
-            
-            TODO simpler (1152, x) -> (x, 1152) -> pack -> (x, 144) uint8 -> (x, 18) uint64. (x <= 512)
+
+            TODO simpler (1152, x) -> (x, 1152) -> pack -> (x, 144) uint8 -> (x, 18) uint64.
+
+        Returns:
+            The packed matrix of weights with shape (x, 18) where x <= 512.
         """
         # Reshape to 64 columns to avoid contiguous problem.
         w_unpacked_aligned = w_unpacked.T.reshape((-1, N_BIT_PACKED_WEIGHT))
@@ -646,6 +672,8 @@ def _weight_pack(w_unpacked: WRAMUnpackedType) -> WRAMPackedType:
         # w_packed_u64 = np.ascontiguousarray(w_packed_u8).view(WRAM_PACKED_DTYPE)
         w_packed_u64.setflags(write=False)
 
+        # TODO If the assertion is useless, remove it.
+        assert w_packed_u64.shape[1] == CorePlacement.N_U64_ON_WRAM_ADDR
         return w_packed_u64
 
     @staticmethod
@@ -655,8 +683,9 @@ def neu_params_mapping(neu_confs: list[NeuronConfig]) -> WRAMPackedType:
         NOTE: This function was tested using only the prototype functions. For test items, please refer to              \
             `tests/backend/test_placement.py::TestWeightRamMapping` for details.
 
-        Return:
-            The packed matrix of extra neurons parameters mapped to the WRAM, with shape (x, 18) (x <= 512).
+        Returns:
+            The packed matrix W3 with shape (L, 18) where L is the used columns for mapping neurons parameters. See     \
+            details in function `_weight_ram_mapping`.
         """
         neu_conf_params_list: list[WRAMUnpackedType] = []
 
@@ -706,7 +735,7 @@ def neu_params_mapping(neu_confs: list[NeuronConfig]) -> WRAMPackedType:
             dtype=WRAM_UNPACKED_DTYPE,
         )
         _n_bit_nparams = NEURON_PARAMS_BIT_LENGTH * N_NEURON_PARAM_IN_COL
-        result[:_n_bit_nparams] = neu_params.T
+        result[:_n_bit_nparams, :] = neu_params.T
 
         # (1152, y) -> (y, 18)
         return CorePlacement._weight_pack(result)
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index ff7984ed..ee76e0b1 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -50,7 +50,7 @@
 DestNodeType: TypeAlias = Neuron
 
 WRAM_UNPACKED_DTYPE = np.uint8
-WRAM_PACKED_DTYPE = np.uint64
+WRAM_PACKED_DTYPE = np.uint64  # Type of one frame of data package
 # Type of unpacked weight in WRAM
 WRAMUnpackedType: TypeAlias = NDArray[WRAM_UNPACKED_DTYPE]
 # Type of packed weight in WRAM

From 5360ce3b7583c8568769144aaf62899067795169 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 5 Nov 2024 10:55:48 +0800
Subject: [PATCH 122/187] =?UTF-8?q?=E2=9C=85=20test(wram):=20improved=20th?=
 =?UTF-8?q?e=20test=20cases=20for=20mapping=20neuron=20parameters=20to=20W?=
 =?UTF-8?q?RAM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_placement.py | 96 +++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 28 deletions(-)

diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index ea8aedbb..5417b59d 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -152,38 +152,44 @@ def test_uint8_unpackbits_scalar(self):
         assert np.array_equal(y2, np.array([1, 0, 1, 0, 0, 1, 1, 1], dtype=np.uint8))
 
     @pytest.mark.parametrize(
-        "shape, wp, lcn_ex",
+        "shape, wp, lcn_ex, iw",
         [
-            ((120, 800), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X),
-            ((16, 16), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_2X),
-            ((80, 48), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_16X),
-            ((100, 510), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_1X),
-            ((99, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_2X),
-            ((100, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_8X),
+            ((600, 100), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X, 1),
+            ((1000, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_2X, 1),
+            ((120, 800), WW.WEIGHT_WIDTH_1BIT, LCN_EX.LCN_4X, 8),
+            ((16, 16), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_2X, 8),
+            ((80, 48), WW.WEIGHT_WIDTH_4BIT, LCN_EX.LCN_16X, 8),
+            ((100, 510), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_1X, 8),
+            ((99, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_2X, 8),
+            ((100, 32), WW.WEIGHT_WIDTH_8BIT, LCN_EX.LCN_8X, 8),
         ],
     )
     def test_unpacked_weight_pack(
-        self, shape, wp, lcn_ex, fixed_rng: np.random.Generator
+        self, shape, wp, lcn_ex, iw, fixed_rng: np.random.Generator
     ):
-        assert shape[1] <= _get_max_fanout(8, wp + lcn_ex)
+        assert shape[1] <= _get_max_fanout(iw, wp + lcn_ex)
 
         nbit = 1 << wp
         nfold = 1 << lcn_ex
         _low, _high = _nbit_limit(nbit)
         # Generate the unpacked weight, folded
         test_weight = fixed_rng.integers(_low, _high, size=shape, dtype=WEIGHT_DTYPE)
-        w_packed_u64 = self._weight_pack(test_weight, nbit, nfold)
+        w_packed_u64 = self._weight_pack(test_weight, nbit, nfold, iw)
 
         assert w_packed_u64.shape[0] == WRAM_BASE_SHAPE[1]
 
     @staticmethod
-    def _weight_pack(w: WeightType, nbit: int, nfold: int) -> WRAMPackedType:
+    def _weight_pack(
+        w: WeightType, nbit: int, nfold: int, iw: Literal[1, 8]
+    ) -> WRAMPackedType:
         """This prototype function is used to pack the unpacked uint8 weight of size `WRAM_BASE_SHAPE` into \
-            a packed uint64 weight of size (WRAM_BASE_SHAPE[1], WRAM_BASE_SHAPE[0]//64)."""
+            a packed uint64 weight of size (WRAM_BASE_SHAPE[1], WRAM_BASE_SHAPE[0]//64).
+        """
         wram_base_shape = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
 
         # -> 1152*512 uint8
-        wram_unpacked = TestWeightRamMapping._weight_ram_mapping(w, nbit, nfold, 8)
+        wram_unpacked = TestWeightRamMapping._weight_ram_mapping(w, nbit, nfold, iw)
+
         wram_base_shape[:, : wram_unpacked.shape[1]] = wram_unpacked
 
         # -> 512*1152 -> (512*18)*64
@@ -201,7 +207,14 @@ def _weight_pack(w: WeightType, nbit: int, nfold: int) -> WRAMPackedType:
         a = np.packbits(wram_base_shape.T, axis=1, bitorder=W_BITORDER)
         b = np.ascontiguousarray(a).view(WRAM_PACKED_DTYPE)
 
+        # Use the method in the `CorePlacement`, return the weight part only.
+        # TODO If everything is OK, just keep this method.
+        wram_packed_u64 = CorePlacement._weight_pack(wram_unpacked)
+
         assert np.array_equal(w_packed_u64, b)
+        assert np.array_equal(
+            wram_packed_u64, w_packed_u64[: wram_packed_u64.shape[0], :]
+        )
 
         return w_packed_u64
 
@@ -329,20 +342,46 @@ def test_weight_ram_mapping_iw8(
         w_folded = CorePlacement._nfold_weight(test_weight, expected_shape[0], nfold)
 
         # 2. Map to the NRAM.
-        wram_unpacked = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
-        wram_weight = self._weight_ram_mapping(w_folded, nbit, nfold, iw)
-        wram_unpacked[:, : wram_weight.shape[1]] = wram_weight
+        # (1152, 512)
+        wram_unpacked_total = np.zeros(WRAM_BASE_SHAPE, dtype=WRAM_UNPACKED_DTYPE)
+        wram_weight_unpacked = self._weight_ram_mapping(w_folded, nbit, nfold, iw)
+        wram_unpacked_total[:, : wram_weight_unpacked.shape[1]] = wram_weight_unpacked
 
+        n_col_used_total = wram_weight_unpacked.shape[1]
+        wram_nparams_unpacked = None
         # NOTE: While mapping extra neuron parameters to the WRAM occurs
         # during the configuration frame export phase, it is tested here.
         if (n_extra_neurons := shape[1] - WRAM_BASE_SHAPE[1]) > 0:
-            wram_neurons = self._gen_wram_for_neurons(n_extra_neurons, wp, lcn_ex)
+            wram_nparams_unpacked = self._gen_wram_for_neurons(
+                n_extra_neurons, wp, lcn_ex
+            )
 
-            assert wram_weight.shape[1] + wram_neurons.shape[1] <= WRAM_BASE_SHAPE[1]
-            wram_unpacked[:, -wram_neurons.shape[1] :] = wram_neurons
+            n_col_used_total += wram_nparams_unpacked.shape[1]
+            wram_unpacked_total[:, wram_weight_unpacked.shape[1] : n_col_used_total] = (
+                wram_nparams_unpacked
+            )
+
+            # Get the used columns of wram_unpacked_total after `np.hstack`.
+            wram_unpacked_total2 = np.hstack(
+                [wram_weight_unpacked, wram_nparams_unpacked]
+            )
+            assert np.array_equal(
+                wram_unpacked_total[:, :n_col_used_total], wram_unpacked_total2
+            )
 
         # 3. Check
-        self._wram_mapping_check_iw8(test_weight, w_folded, wram_unpacked, nbit, nfold)
+        assert n_col_used_total <= WRAM_BASE_SHAPE[1]
+        self._wram_mapping_check_iw8(
+            test_weight, w_folded, wram_unpacked_total, nbit, nfold
+        )
+
+        # 4. Pack
+        wram_weight_packed = CorePlacement._weight_pack(wram_weight_unpacked)
+        assert wram_weight_packed.shape[1] <= WRAM_BASE_SHAPE[1]
+
+        if wram_nparams_unpacked is not None:
+            wram_nparams_packed = CorePlacement._weight_pack(wram_nparams_unpacked)
+            assert wram_nparams_packed.shape[1] <= WRAM_BASE_SHAPE[1]
 
     @staticmethod
     def _weight_ram_mapping(
@@ -667,10 +706,12 @@ def test_weight_ram_mapping_for_neurons(self, shape, wp, lcn_ex):
         wram_neurons = self._gen_wram_for_neurons(n_extra_neurons, wp, lcn_ex)
 
     @staticmethod
-    def _gen_wram_for_neurons(n_extra_neurons: int, wp, lcn_ex):
+    def _gen_wram_for_neurons(
+        n_extra_neurons: int, wp: WW, lcn_ex: LCN_EX
+    ) -> WRAMUnpackedType:
         """A prototype function for mapping extra neurons parameters on the WRAM for 8-bit input width.
 
-        NOTE: The shape of final result` is (1152(WRAM_BASE_SHAPE[0]), x), where x <= 512 (WRAM_BASE_SHAPE[1]).
+        NOTE: The shape of final result is (1152(WRAM_BASE_SHAPE[0]), x), where x <= 512 (WRAM_BASE_SHAPE[1]).
         """
         extra_neurons = pb.ANNNeuron(n_extra_neurons, bit_trunc=15)
         # extra_neurons = pb.ANNBypassNeuron(n_extra_neurons)
@@ -700,11 +741,12 @@ def _gen_wram_for_neurons(n_extra_neurons: int, wp, lcn_ex):
                 params.view(WRAM_UNPACKED_DTYPE), axis=0, bitorder=W_BITORDER
             )[:NEURON_PARAMS_BIT_LENGTH]
 
-        # Slow method
         n_col_avail = math.ceil(
             (_get_max_fanout(8, wp + lcn_ex) - WRAM_BASE_SHAPE[1])
             / N_NEURON_PARAM_IN_COL
         )
+        # Slow method
+        # TODO Remove it if allright
         wram_neurons_slow = np.zeros(
             (WRAM_BASE_SHAPE[0], n_col_avail), dtype=WRAM_UNPACKED_DTYPE
         )
@@ -729,12 +771,10 @@ def _gen_wram_for_neurons(n_extra_neurons: int, wp, lcn_ex):
         neuron_params_214b = neuron_params_214b.reshape((-1, n_bit_nparams))
         _n_col_occupied = neuron_params_214b.shape[0]
 
-        result = np.zeros(
-            (WRAM_BASE_SHAPE[0], _n_col_occupied), dtype=WRAM_UNPACKED_DTYPE
-        )
-        result[:n_bit_nparams] = neuron_params_214b.T
+        result = np.zeros((WRAM_BASE_SHAPE[0], n_col_avail), dtype=WRAM_UNPACKED_DTYPE)
+        result[:n_bit_nparams, :_n_col_occupied] = neuron_params_214b.T
 
-        assert np.array_equal(result, wram_neurons_slow[:, :_n_col_occupied])
+        assert np.array_equal(result, wram_neurons_slow)
 
         return result
 

From 389e6a32d665c8e7ce89fb00fe6dd2b7eacfd3de Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 5 Nov 2024 11:02:43 +0800
Subject: [PATCH 123/187] =?UTF-8?q?=F0=9F=8E=A8=20typing:=20improved=20typ?=
 =?UTF-8?q?ing=20for=20`DynSysGroup`=20&=20some?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py   | 19 ++++++++++-------
 paibox/components/functional.py | 37 ++++++++++++++++-----------------
 paibox/network.py               | 17 +++++++--------
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index daacfaba..6c5e9aea 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,12 +1,12 @@
 import math
 from typing import Literal, Optional, Protocol, Union
+import typing
 
 import numpy as np
 from paicorelib import TM, HwConfig
 
 from paibox.base import NeuDyn, NodeList
-from paibox.exceptions import ResourceError
-from paibox.network import DynSysGroup
+from paibox.exceptions import ResourceError, ShapeError
 from paibox.types import (
     LEAK_V_DTYPE,
     NEUOUT_U8_DTYPE,
@@ -45,6 +45,9 @@
     _Pool2dForward,
 )
 
+if typing.TYPE_CHECKING:
+    from paibox.network import DynSysGroup
+
 __all__ = [
     "_DelayChainANN",
     "_DelayChainSNN",
@@ -94,7 +97,7 @@ def __init__(
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return x1
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n_delaychain = NodeList()
         s_delaychain = NodeList()
 
@@ -161,7 +164,7 @@ class _HasSemiFoldedIntf(Protocol):
 
     def build(
         self,
-        network: DynSysGroup,
+        network: "DynSysGroup",
         valid_interval: int,
         ts_first_valid_inp: int,
         **build_options,
@@ -276,7 +279,7 @@ def __init__(
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return self.tfm(x1)
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         if self.tfm.pool_type == "avg":
             n1_p1d = Neuron(
                 self.shape_out,
@@ -366,7 +369,7 @@ def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageTyp
     def synaptic_integr(self, x1: NeuOutType, vjt_pre: VoltageType) -> VoltageType:
         return vjt_overflow(vjt_pre + self.tfm(x1).ravel())
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_p1d = IF(
             self.shape_out,
             threshold=self.pos_thres,
@@ -446,7 +449,7 @@ def __init__(
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return self.tfm(x1)
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         if self.tfm.pool_type == "avg":
             n1_p2d = Neuron(
                 self.shape_out,
@@ -540,7 +543,7 @@ def spike_func(self, vjt: VoltageType, **kwargs) -> tuple[NeuOutType, VoltageTyp
     def synaptic_integr(self, x1: NeuOutType, vjt_pre: VoltageType) -> VoltageType:
         return vjt_overflow(vjt_pre + self.tfm(x1).ravel())
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_p2d = IF(
             self.shape_out,
             threshold=self.pos_thres,
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 00bdcf66..8d026ce6 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -2,13 +2,13 @@
 from collections.abc import Sequence
 from functools import partial
 from typing import ClassVar, Optional, Union
+import typing
 
 import numpy as np
 from paicorelib import NTM, RM, TM
 
 from paibox.base import NeuDyn, NodeList
-from paibox.exceptions import PAIBoxDeprecationWarning, ResourceError, ShapeError
-from paibox.network import DynSysGroup
+from paibox.exceptions import PAIBoxDeprecationWarning, ShapeError
 from paibox.types import (
     LEAK_V_DTYPE,
     NEUOUT_U8_DTYPE,
@@ -46,6 +46,9 @@
 else:
     from typing_extensions import deprecated
 
+if typing.TYPE_CHECKING:
+    from paibox.network import DynSysGroup
+
 __all__ = [
     "BitwiseAND",
     "BitwiseNOT",
@@ -105,7 +108,7 @@ def __init__(
     def spike_func(self, x1: NeuOutType, x2: NeuOutType, **kwargs) -> NeuOutType:
         return x1 & x2
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_and = LIF(
             self.shape_out,
             threshold=1,
@@ -175,7 +178,7 @@ def __init__(
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         return x1 == 0  # x1 is an array in uint8
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_not = LIF(
             self.shape_out,
             threshold=1,
@@ -229,7 +232,7 @@ def __init__(
     def spike_func(self, x1: NeuOutType, x2: NeuOutType, **kwargs) -> NeuOutType:
         return x1 | x2
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_or = BypassNeuron(
             self.shape_out,
             delay=self.delay_relative,
@@ -288,7 +291,7 @@ def __init__(
     def spike_func(self, x1: NeuOutType, x2: NeuOutType, **kwargs) -> NeuOutType:
         return x1 ^ x2
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         # If neuron_a is of shape (h1, w1) = N, and neuron_b is of shape (h2, w2) = N.
         # The output shape of the module is (N,) or (h1, w1)(if h1 == h2).
         # The shape of n1 is (2N,) or (2, h1, w1).
@@ -398,7 +401,7 @@ def synaptic_integr(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
         )
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_sadd = IF(
             self.shape_out,
             self.pos_threshold,
@@ -694,7 +697,7 @@ def synaptic_integr(
             x1, x2, self.factor_a, self.factor_b, vjt_pre, strict=self.overflow_strict
         )
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_ssub = Neuron(
             self.shape_out,
             reset_mode=RM.MODE_LINEAR,
@@ -766,7 +769,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
 
         return _x1.T
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_t2d = BypassNeuron(
             self.shape_out,
             delay=self.delay_relative,
@@ -833,7 +836,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
 
         return _x1.transpose(self.axes)
 
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n1_t3d = BypassNeuron(
             self.shape_out,
             delay=self.delay_relative,
@@ -865,16 +868,12 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
         raise NotImplementedError
 
     def build(
-        self, network: DynSysGroup, valid_interval: int, **build_options
+        self, network: "DynSysGroup", valid_interval: int, **build_options
     ) -> BuiltComponentType:
         assert len(self.module_intf.operands[0].shape_out) == 2
         self.valid_interval = valid_interval
 
         in_ch, in_h = self.module_intf.operands[0].shape_out
-        if in_ch * in_h * in_h * valid_interval > 18432:
-            raise ResourceError(
-                f"The {self.name} input size is too large. Please adjust the input size or the number of channels."
-            )
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
@@ -915,7 +914,7 @@ def build(
                 neuron,
                 n_fc,
                 weights=w,
-                conn_type=self.conn_type,
+                conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
             )
             s_weight.append(syn2)
@@ -983,7 +982,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
 
     def build(
         self,
-        network: DynSysGroup,
+        network: "DynSysGroup",
         valid_interval: int,
         ts_first_valid_inp: int,
         **build_options,
@@ -1253,7 +1252,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
 
     def build(
         self,
-        network: DynSysGroup,
+        network: "DynSysGroup",
         valid_interval: int,
         ts_first_valid_inp: int,
         **build_options,
@@ -1373,7 +1372,7 @@ def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
 
     def build(
         self,
-        network: DynSysGroup,
+        network: "DynSysGroup",
         valid_interval: int,
         ts_first_valid_inp: int,
         **build_options,
diff --git a/paibox/network.py b/paibox/network.py
index b657105b..a6057ade 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -7,6 +7,12 @@
 from .collector import Collector
 from .components import NeuModule, Neuron, Projection
 from .components.modules import BuiltComponentType
+from .components.functional import (
+    AvgPool2dSemiFolded,
+    Conv2dSemiFolded,
+    LinearSemiFolded,
+    MaxPool2dSemiFolded,
+)
 from .mixin import Container
 from .node import NodeDict, NodeList
 
@@ -79,13 +85,6 @@ def __call__(self, **kwargs) -> None:
     def build_fmodule(
         cls, network: "DynSysGroup", **build_options
     ) -> dict[NeuModule, BuiltComponentType]:
-        from .components.functional import (
-            AvgPool2dSemiFolded,
-            Conv2dSemiFolded,
-            LinearSemiFolded,
-            MaxPool2dSemiFolded,
-        )
-
         generated = dict()
         modules = network.nodes().subset(NeuModule).unique()
 
@@ -133,8 +132,8 @@ def _remove_components(self, *components: DynamicSys) -> None:
         for cpn in components:
             for tag, obj in self.__dict__.items():
                 if cpn is obj:
-                    cpn.__gh_build_ignore__ = False
-                    delattr(self, tag)
+                    # cpn.__gh_build_ignore__ = False
+                    delattr(self, tag)  # remove the cpn from the network
                     break
 
     def _ignore_components(self, *components: DynamicSys) -> None:

From 2ec15d999176f902767b416d763d83872a1b685a Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 5 Nov 2024 11:12:01 +0800
Subject: [PATCH 124/187] =?UTF-8?q?=F0=9F=8E=A8=20chore:=20improved=20impo?=
 =?UTF-8?q?rt,=20add=20comments?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/__init__.py                  |  21 ++-
 paibox/components/_modules.py       |  26 ++--
 paibox/components/functional.py     | 226 ++++++++++++----------------
 paibox/components/neuron/neurons.py |   5 +-
 paibox/exceptions.py                |   2 +-
 tests/shared_networks.py            |   2 -
 6 files changed, 129 insertions(+), 153 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index df0bfe40..50a890d7 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -5,16 +5,17 @@
 from .backend import Mapper as Mapper
 
 # Functional modules in ANN mode only
-# Functional modules
 from .components.functional import AvgPool2dSemiFolded as AvgPool2dSemiFolded
-from .components.functional import BitwiseAND as BitwiseAND
-from .components.functional import BitwiseNOT as BitwiseNOT
-from .components.functional import BitwiseOR as BitwiseOR
-from .components.functional import BitwiseXOR as BitwiseXOR
 from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
 from .components.functional import Linear as Linear
 from .components.functional import LinearSemiFolded as LinearSemiFolded
 from .components.functional import MaxPool2dSemiFolded as MaxPool2dSemiFolded
+
+# Functional modules in SNN mode only
+from .components.functional import BitwiseAND as BitwiseAND
+from .components.functional import BitwiseNOT as BitwiseNOT
+from .components.functional import BitwiseOR as BitwiseOR
+from .components.functional import BitwiseXOR as BitwiseXOR
 from .components.functional import SpikingAdd as SpikingAdd
 from .components.functional import SpikingAvgPool1d as SpikingAvgPool1d
 from .components.functional import SpikingAvgPool1dWithV as SpikingAvgPool1dWithV
@@ -29,18 +30,22 @@
 # Reduced neurons
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
-from .components.neuron.neurons import ANNBypassNeuron as ANNBypassNeuron
-from .components.neuron.neurons import ANNNeuron as ANNNeuron
 from .components.neuron.neurons import BypassNeuron as BypassNeuron
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
 from .components.neuron.neurons import SpikingRelu as SpikingRelu
 from .components.neuron.neurons import TonicSpiking as TonicSpiking
 
+# Recued neurons in ANN mode only
+from .components.neuron.neurons import ANNBypassNeuron as ANNBypassNeuron
+from .components.neuron.neurons import ANNNeuron as ANNNeuron
+
 # Input projection
 from .components.projection import InputProj as InputProj
 
-# Synapses
+# Connection types of synapses
 from .components.synapses import ConnType as SynConnType
+
+# Synapses
 from .components.synapses.synapses import Conv1d as Conv1d
 from .components.synapses.synapses import Conv2d as Conv2d
 from .components.synapses.synapses import ConvTranspose1d as ConvTranspose1d
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 6c5e9aea..e0526c27 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -174,9 +174,9 @@ def build(
 @set_rt_mode_ann()
 class _SemiFoldedModule(FunctionalModule, _HasSemiFoldedIntf):
     valid_interval: int = 1
-    """The interval of valid output data"""
+    """The interval of valid output data."""
     ts_1st_valid_out: int = 0
-    """The timestamp of the first valid output data"""
+    """The timestamp of the first valid output data."""
 
     def _input_buffer_len_check(
         self, in_channels: int, in_h: int, kw: int, valid_interval: int
@@ -206,28 +206,36 @@ def __init__(
         bias: DataType = 0,
         bit_trunc: int = 8,
         *,
-        conn_type: ConnType = ConnType.All2All,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
+        """Basic linear layer for ANN mode.
+
+        Args:
+            neuron_s: the input neuron.
+            out_features: the output shape.
+            weights: the weight matrix.
+            bias: It can be a scalar or an array of the same size as the output.
+            bit_trunc: the bit truncation position. By default, bits 7 to 0 are truncated.
+        """
         self.weights = weights
-        self.conn_type = conn_type
         self.bit_trunc = bit_trunc
+        _shape_out = as_shape(out_features)
 
         if isinstance(bias, np.ndarray):
             _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
+            if _bias.shape != _shape_out:
+                raise ShapeError(
+                    f"the shape of bias {_bias.shape} does not match the shape of output {_shape_out}."
+                )
         else:
             _bias = int(bias)
 
         self.bias = _bias
 
         super().__init__(
-            neuron_s,
-            shape_out=as_shape(out_features),
-            keep_shape=keep_shape,
-            name=name,
-            **kwargs,
+            neuron_s, shape_out=_shape_out, keep_shape=keep_shape, name=name, **kwargs
         )
 
 
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 8d026ce6..69cb907c 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -64,9 +64,9 @@
     "SpikingSub",
     "Transpose2d",
     "Transpose3d",
-    "Conv2dSemiFolded",
-    "Filter",
     "Linear",
+    "LinearSemiFolded",
+    "Conv2dSemiFolded",
     "MaxPool2dSemiFolded",
     "AvgPool2dSemiFolded",
 ]
@@ -861,6 +861,44 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         return generated
 
 
+@set_rt_mode_ann()
+class Linear(_LinearBase):
+    "Linear layer for ANN."
+
+    inherent_delay = 0
+
+    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
+        output = x1 @ self.weights.astype(VOLTAGE_DTYPE)
+        output = output + self.bias
+        output = np.where(output >= 1, MetaNeuron._truncate(output, self.bit_trunc), 0)
+
+        return output.astype(NEUOUT_U8_DTYPE)
+
+    def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
+        neuron_d = ANNNeuron(
+            self.shape_out,
+            self.bias,
+            self.bit_trunc,
+            delay=self.delay_relative,
+            tick_wait_start=self.tick_wait_start,
+            tick_wait_end=self.tick_wait_end,
+            keep_shape=self.keep_shape,
+            name=f"nd_{self.name}",
+        )
+        syn1 = FullConnSyn(
+            self.module_intf.operands[0],
+            neuron_d,
+            weights=self.weights,
+            conn_type=ConnType.All2All,
+            name=f"syn1_{self.name}",
+        )
+
+        generated = [neuron_d, syn1]
+        self._rebuild_out_intf(network, neuron_d, *generated, **build_options)
+
+        return generated
+
+
 class LinearSemiFolded(_LinearBase, _SemiFoldedModule):
     "This operator is used on the first fully-connected layer after the semi-folded convolution."
 
@@ -940,7 +978,16 @@ def __init__(
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """2d semi-folded convolution for ANN mode."""
+        """2d semi-folded convolution for ANN mode.
+
+        Args:
+            neuron_s: source neuron. The dimensions need to be expressed explicitly as (C,H,W).
+            kernel: convolution kernel in (O,I,H,W) order.
+            stride: the step size of the kernel sliding. It can be a scalar or a tuple of 2 integers.
+            padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 2 integers.
+            bias: It can be a scalar or an array of the same size as the output.
+            bit_trunc: the bit truncation position. By default, bits 7 to 0 are truncated.
+        """
         if kernel.ndim != self._spatial_ndim + 2:
             raise ShapeError(
                 f"convolution kernel dimension must be {self._spatial_ndim + 2}, but got {kernel.ndim}."
@@ -951,13 +998,6 @@ def __init__(
         self.padding = _pair(padding)
         self.bit_trunc = bit_trunc
 
-        if isinstance(bias, np.ndarray):
-            _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
-        else:
-            _bias = int(bias)
-
-        self.bias = _bias
-
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
         # XXX Do not consider the case when the shape of source neurons needs to be changed, for now.
@@ -969,12 +1009,21 @@ def __init__(
         if in_ch != cin:
             raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
+        _shape_out = (cout, out_h)
+
+        if isinstance(bias, np.ndarray):
+            _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
+            if _bias.shape != _shape_out:
+                raise ShapeError(
+                    f"the shape of bias {_bias.shape} does not match the shape of output {_shape_out}."
+                )
+        else:
+            _bias = int(bias)
+
+        self.bias = _bias
+
         super().__init__(
-            neuron_s,
-            shape_out=(cout, out_h),
-            keep_shape=keep_shape,
-            name=name,
-            **kwargs,
+            neuron_s, shape_out=_shape_out, keep_shape=keep_shape, name=name, **kwargs
         )
 
     def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
@@ -1057,10 +1106,9 @@ def build(
             )
             s_kernel.append(syn2)
 
-        # Extra negative padding layer
+        # Add additional negative padding layer to eliminate the incorrect output
         # NOTE: ts_first_valid_inp = 0 & padding[0] > 0 means the previous layer is
         # an input node. No need to add negative padding layer for this case.
-        # TODO add technical details
         if ts_first_valid_inp > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
@@ -1106,114 +1154,6 @@ def build(
         return generated
 
 
-@deprecated(
-    "The backend currently does not support 'Filter', please use it in a future version",
-    category=PAIBoxDeprecationWarning,
-)
-@set_rt_mode_ann()
-class Filter(FunctionalModule):
-    def __init__(
-        self,
-        neuron: Union[NeuDyn, InputProj],
-        time_to_fire: int,
-        keep_shape: bool = False,
-        name: Optional[str] = None,
-        **kwargs,
-    ) -> None:
-        """ """
-        shape_out = neuron.shape_out
-        self.time_to_fire = time_to_fire
-        self.cur_time = 0
-        super().__init__(
-            neuron,
-            shape_out=shape_out,
-            keep_shape=keep_shape,
-            name=name,
-            **kwargs,
-        )
-
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        if self.cur_time != self.time_to_fire:
-            self.cur_time += 1
-            return np.zeros_like(x1)
-        else:
-            self.cur_time = 0
-            return x1
-
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        inp1 = Always1Neuron((2,))
-        n1_filter = Neuron(
-            self.shape_out,
-            leak_v=0,
-            neg_threshold=0,
-            delay=self.delay_relative,
-            tick_wait_start=self.tick_wait_start,
-            tick_wait_end=self.tick_wait_end,
-            input_width=self.input_width,
-            spike_width=self.spike_width,
-            snn_en=self.snn_en,
-            keep_shape=self.keep_shape,
-            name="filter",
-        )
-
-        syn1 = FullConnSyn(
-            self.module_intf.operands[0],  # (10,0)
-            n1_filter,  # (10,0)
-            weights=1,
-            conn_type=ConnType.One2One,
-            name=f"s0_{self.name}",
-        )
-        syn2 = FullConnSyn(
-            inp1,  # (2,0)
-            n1_filter,  # (10,0)
-            weights=-128,
-            conn_type=ConnType.All2All,
-            name=f"s1_{self.name}",
-        )
-        network._add_components(n1_filter, syn1, syn2)
-        network._remove_components(self)
-        generated = [n1_filter, syn1, syn2]
-        return generated
-
-
-@set_rt_mode_ann()
-class Linear(_LinearBase):
-    "Linear layer for ANN."
-
-    inherent_delay = 0
-
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        output = x1 @ self.weights.astype(VOLTAGE_DTYPE)
-        output = output + self.bias
-        output = np.where(output >= 1, MetaNeuron._truncate(output, self.bit_trunc), 0)
-
-        return output.astype(NEUOUT_U8_DTYPE)
-
-    def build(self, network: DynSysGroup, **build_options) -> BuiltComponentType:
-        neuron_d = ANNNeuron(
-            self.shape_out,
-            self.bias,
-            self.bit_trunc,
-            delay=self.delay_relative,
-            tick_wait_start=self.tick_wait_start,
-            tick_wait_end=self.tick_wait_end,
-            keep_shape=self.keep_shape,
-            name=f"nd_{self.name}",
-        )
-        syn1 = FullConnSyn(
-            self.module_intf.operands[0],
-            neuron_d,
-            weights=self.weights,
-            conn_type=ConnType.All2All,
-            name=f"syn1_{self.name}",
-        )
-
-        generated = [neuron_d, syn1]
-        self._rebuild_out_intf(network, neuron_d, *generated, **build_options)
-
-        return generated
-
-
 class MaxPool2dSemiFolded(_SemiFoldedModule):
     _spatial_ndim: ClassVar[int] = 2
 
@@ -1226,7 +1166,16 @@ def __init__(
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """2d semi-folded max pooling for ANN mode."""
+        """2d semi-folded max pooling for ANN mode.
+
+        Args:
+            neuron_s: the input neuron to be pooled.
+            kernel_size: the size of the window to take a max over.
+            stride: the stride of the window. Default value is `kernel_size`.
+
+        NOTE: Since the semi-folded max pooling in the ANN mode is implemented using comparators, it is not \
+            possible to use negative padding layer to eliminate the incorrect results of the padding part.
+        """
         self.kernel_size = _pair(kernel_size)
         if stride is None:
             _stride = self.kernel_size
@@ -1345,7 +1294,15 @@ def __init__(
         name: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """2d AvgPool2d_semimap for spike."""
+        """2d semi-folded average pooling for ANN mode.
+
+        Args:
+            neuron_s: the input neuron to be pooled.
+            kernel_size: the size of the window.
+            stride: the stride of the window. Default value is `kernel_size`.
+            padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 2    \
+                integers.
+        """
         self.kernel_size = _pair(kernel_size)
         if stride is None:
             _stride = self.kernel_size
@@ -1397,8 +1354,17 @@ def build(
         if build_options.get("check_before_compile"):
             self._input_buffer_len_check(cin, in_h, kw, valid_interval)
 
-        # NOTE: Division is achieved with the help of truncation operation.
-        # It can only be approximated to a power of an integer of 2.
+        # NOTE: Division is achieved with the help of output truncation.
+        # TODO Since division with a divisor that is an integer power of 2 can only be implemented by
+        # truncating the output, when the pooling window is not an integer power of 2 (which is the
+        # usual case), additional processing is required before instantiating these operators.
+        # For example,
+        # 1. The pooling window size is 3x3, but the chip can only accurately implement result/8.
+        # 2. bit_trunc=8 for the output neurons of this pooling layer, but for the next layer, the
+        # weights becomes w*8/9, where w is the original weights.
+        # 3. The alternative is bit_tunc=16 for this layer & w*16/9 for the next layer?
+        # NOTE: The resulting linear transformation of weights of the next layer needs to be considered
+        # during quantization.
         bit_trunc = 8 + (kh * kw).bit_length() - 1
 
         n_delays = NodeList()
@@ -1445,7 +1411,7 @@ def build(
             )
             s_delays.append(syn2)
 
-        # Extra negative padding layer
+        # Add additional negative padding layer to eliminate the incorrect output
         if ts_first_valid_inp > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index c2fc4bcb..85e3df47 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -255,7 +255,6 @@ def __init__(
             - name: name of the neuron. Optional.
 
         NOTE: positive threshold = 1, negative threshold = 0, reset_v = 0, and leak_v = 0.
-
         """
         super().__init__(
             shape, neg_threshold=0, keep_shape=keep_shape, name=name, **kwargs
@@ -263,8 +262,8 @@ def __init__(
 
 
 @deprecated(
-    "'SpikingRelu' is deprecated in version 1.2.0 and   \
-        will be removed in version 1.3.0. Use 'BypassNeuron' instead.",
+    "'SpikingRelu' is deprecated in version 1.2.0 and will "
+    "be removed in version 1.3.0. Use 'BypassNeuron' instead.",
     category=PAIBoxDeprecationWarning,
 )
 class SpikingRelu(BypassNeuron):
diff --git a/paibox/exceptions.py b/paibox/exceptions.py
index 117d0c33..55514a43 100644
--- a/paibox/exceptions.py
+++ b/paibox/exceptions.py
@@ -14,7 +14,7 @@ class PAIBoxWarning(UserWarning):
 
 
 class PAIBoxDeprecationWarning(PAIBoxWarning, DeprecationWarning):
-    """Warning class for features which will be deprecatedin a future version."""
+    """Warning class for deprecated features."""
 
     pass
 
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index f75a7686..6a0d5d8e 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -277,7 +277,6 @@ def __init__(self, shape, kernels, strides, paddings, out_features, weight):
             out_features,
             weight,
             bias=0,
-            conn_type=pb.SynConnType.All2All,
             tick_wait_start=self.conv_list[-1].tick_wait_start + 2,
         )
 
@@ -319,7 +318,6 @@ def __init__(
             out_features,
             weights=weight,
             bias=0,
-            conn_type=pb.SynConnType.All2All,
             tick_wait_start=self.pool_list[-1].tick_wait_start + 2,
         )
 

From 1b9c97021fc33ce75d55196bea4a56a5a08deeff Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 5 Nov 2024 11:12:47 +0800
Subject: [PATCH 125/187] =?UTF-8?q?=E2=9C=85=20test(graph):=20add=20test?=
 =?UTF-8?q?=20for=20`=5F=5Fgh=5Fbuild=5Fignore=5F=5F`=20flag=20in=20pre-bu?=
 =?UTF-8?q?ild=20phase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_graphs.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/backend/test_graphs.py b/tests/backend/test_graphs.py
index f6cdb2ef..6d3de2f0 100644
--- a/tests/backend/test_graphs.py
+++ b/tests/backend/test_graphs.py
@@ -242,14 +242,21 @@ def test_prebuild_gh_build_ignore(
         self, monkeypatch, build_FModule_ConnWithInput_Net
     ):
         net = build_FModule_ConnWithInput_Net
+        mapper = pb.Mapper()
 
         monkeypatch.setattr(net.n1, "__gh_build_ignore__", True)
 
-        mapper = pb.Mapper()
-
         with pytest.raises(GraphConnectionError):
             mapper.build(net)
 
+        monkeypatch.setattr(net.n1, "__gh_build_ignore__", False)
+        monkeypatch.setattr(net.s2, "__gh_build_ignore__", True)
+        monkeypatch.setattr(net.n2, "__gh_build_ignore__", True)
+
+        mapper.build(net)
+        assert net.s2.name not in mapper.graph._raw_edges
+        assert net.n2.name not in mapper.graph._raw_nodes
+
     @pytest.mark.parametrize("no_twisted_branch", [True, False])
     def test_untwist_branch_nodes1(
         self, ensure_dump_dir, build_Network_branch_nodes, no_twisted_branch

From 0bbd9b1f9f33700d02e747533270da97a91eb75d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Nov 2024 12:54:04 +0000
Subject: [PATCH 126/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/__init__.py              | 18 ++++++++----------
 paibox/backend/placement.py     |  1 -
 paibox/components/_modules.py   |  2 +-
 paibox/components/functional.py |  2 +-
 paibox/network.py               |  2 +-
 tests/backend/test_placement.py |  2 +-
 6 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/paibox/__init__.py b/paibox/__init__.py
index 50a890d7..928c30af 100644
--- a/paibox/__init__.py
+++ b/paibox/__init__.py
@@ -4,18 +4,17 @@
 from .backend import BACKEND_CONFIG as BACKEND_CONFIG
 from .backend import Mapper as Mapper
 
+# Functional modules in SNN mode only
 # Functional modules in ANN mode only
 from .components.functional import AvgPool2dSemiFolded as AvgPool2dSemiFolded
-from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
-from .components.functional import Linear as Linear
-from .components.functional import LinearSemiFolded as LinearSemiFolded
-from .components.functional import MaxPool2dSemiFolded as MaxPool2dSemiFolded
-
-# Functional modules in SNN mode only
 from .components.functional import BitwiseAND as BitwiseAND
 from .components.functional import BitwiseNOT as BitwiseNOT
 from .components.functional import BitwiseOR as BitwiseOR
 from .components.functional import BitwiseXOR as BitwiseXOR
+from .components.functional import Conv2dSemiFolded as Conv2dSemiFolded
+from .components.functional import Linear as Linear
+from .components.functional import LinearSemiFolded as LinearSemiFolded
+from .components.functional import MaxPool2dSemiFolded as MaxPool2dSemiFolded
 from .components.functional import SpikingAdd as SpikingAdd
 from .components.functional import SpikingAvgPool1d as SpikingAvgPool1d
 from .components.functional import SpikingAvgPool1dWithV as SpikingAvgPool1dWithV
@@ -27,18 +26,17 @@
 from .components.functional import Transpose2d as Transpose2d
 from .components.functional import Transpose3d as Transpose3d
 
+# Recued neurons in ANN mode only
 # Reduced neurons
 from .components.neuron.neurons import IF as IF
 from .components.neuron.neurons import LIF as LIF
+from .components.neuron.neurons import ANNBypassNeuron as ANNBypassNeuron
+from .components.neuron.neurons import ANNNeuron as ANNNeuron
 from .components.neuron.neurons import BypassNeuron as BypassNeuron
 from .components.neuron.neurons import PhasicSpiking as PhasicSpiking
 from .components.neuron.neurons import SpikingRelu as SpikingRelu
 from .components.neuron.neurons import TonicSpiking as TonicSpiking
 
-# Recued neurons in ANN mode only
-from .components.neuron.neurons import ANNBypassNeuron as ANNBypassNeuron
-from .components.neuron.neurons import ANNNeuron as ANNNeuron
-
 # Input projection
 from .components.projection import InputProj as InputProj
 
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index c498ed81..364a397d 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -42,7 +42,6 @@
     is_iw8,
 )
 
-
 # Get the fan-out by the combination rate of dendrites
 if hasattr(HwConfig, "FANOUT_IW8"):
     FANOUT_IW8 = HwConfig.FANOUT_IW8
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index e0526c27..4ec40ed1 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,6 +1,6 @@
 import math
-from typing import Literal, Optional, Protocol, Union
 import typing
+from typing import Literal, Optional, Protocol, Union
 
 import numpy as np
 from paicorelib import TM, HwConfig
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 69cb907c..9438d839 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1,8 +1,8 @@
 import sys
+import typing
 from collections.abc import Sequence
 from functools import partial
 from typing import ClassVar, Optional, Union
-import typing
 
 import numpy as np
 from paicorelib import NTM, RM, TM
diff --git a/paibox/network.py b/paibox/network.py
index a6057ade..e23defd1 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -6,13 +6,13 @@
 from .base import DynamicSys, SynSys
 from .collector import Collector
 from .components import NeuModule, Neuron, Projection
-from .components.modules import BuiltComponentType
 from .components.functional import (
     AvgPool2dSemiFolded,
     Conv2dSemiFolded,
     LinearSemiFolded,
     MaxPool2dSemiFolded,
 )
+from .components.modules import BuiltComponentType
 from .mixin import Container
 from .node import NodeDict, NodeList
 
diff --git a/tests/backend/test_placement.py b/tests/backend/test_placement.py
index 5417b59d..44386b5e 100644
--- a/tests/backend/test_placement.py
+++ b/tests/backend/test_placement.py
@@ -12,7 +12,7 @@
 from paicorelib.framelib import OfflineFrameGen
 
 import paibox as pb
-from paibox.backend.placement import CorePlacement, FANOUT_IW8
+from paibox.backend.placement import FANOUT_IW8, CorePlacement
 from paibox.backend.types import (
     WRAM_PACKED_DTYPE,
     WRAM_UNPACKED_DTYPE,

From 0bc2e25c563261f48c61cda0a407e20a0c327a3a Mon Sep 17 00:00:00 2001
From: birdswimming <birdswimming3.14@gmail.com>
Date: Sat, 9 Nov 2024 13:27:10 +0800
Subject: [PATCH 127/187] fix bug about online core

---
 paibox/backend/routing.py    | 24 ++++++++++++++----------
 tests/backend/test_mapper.py |  6 ++++--
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index e98189d4..5b5219d9 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -5,6 +5,7 @@
 from typing import Any, ClassVar, Union
 
 from paicorelib import ROUTING_DIRECTIONS_IDX as DIREC_IDX
+from paicorelib import ONLINE_CORES_BASE_COORD
 from paicorelib import ChipCoord, Coord, HwConfig, RoutingCoord
 from paicorelib import RoutingDirection as Direction
 from paicorelib import RoutingLevel as Level
@@ -286,14 +287,16 @@ def get_insert_location(
         """Look for the insertion location of the incoming routing group."""
         n_core_aligned = _nearest_multiple_above(self.n_core_total, n_core_incoming)
 
-        n_core_predicted = n_core_aligned + n_core_incoming
-        n_core_inchip = _num_inchip(n_core_predicted)
-
-        # If online cores are hit, start from the next chip
-        if n_core_inchip - n_core_wasted > HwConfig.N_CORE_OFFLINE:
-            n_core_aligned = _nearest_multiple_above(
-                n_core_aligned, HwConfig.N_CORE_MAX_INCHIP
-            )
+        n_core_predicted = n_core_aligned + n_core_incoming 
+        start_core_inchip = _num_inchip(n_core_aligned)
+        end_core_inchip = _num_inchip(n_core_predicted) - n_core_wasted
+        
+        # If online cores are hit, start from the first core after the online cores
+        if start_core_inchip <= ONLINE_CORES_BASE_COORD and end_core_inchip > ONLINE_CORES_BASE_COORD:
+            online_end_inchip = ONLINE_CORES_BASE_COORD + HwConfig.N_CORE_ONLINE
+            # The first core after the online cores
+            online_end = n_core_aligned - start_core_inchip + online_end_inchip
+            n_core_aligned = _nearest_multiple_above(online_end, n_core_incoming)
 
         core_loc = n_core_aligned
 
@@ -332,10 +335,11 @@ def place_routing_group(
         n_core_req = n_core_cost - n_tail_waste
 
         # Check whether a single routing group can be placed within a single core.
-        if n_core_req > HwConfig.N_CORE_OFFLINE:
+        # The largest continuous offline cores is ONLINE_CORES_BASE_COORD.
+        if n_core_req > ONLINE_CORES_BASE_COORD:
             raise ResourceError(
                 "the number of cores required by the routing group exceeds the hardware limit, "
-                f"{n_core_req} > {HwConfig.N_CORE_OFFLINE}."
+                f"{n_core_req} > {ONLINE_CORES_BASE_COORD}."
             )
 
         core_insert_loc, chip_idx_loc, rpath_start = self.get_insert_location(
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 0c8958f9..2d224241 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pytest
-from paicorelib import Coord, HwConfig
+from paicorelib import Coord, HwConfig, ONLINE_CORES_BASE_COORD
 from paicorelib import WeightWidth as WW
 
 import paibox as pb
@@ -262,8 +262,10 @@ def __init__(self):
         if n_networks > 1008:
             r2 = mapper.routing_manager.n_core_per_chip[1]
             assert rtotal == r1 + r2
-            assert r1 == 1008
+            assert r1 == 1024
             assert r2 == n_networks - 1008
+        elif n_networks > ONLINE_CORES_BASE_COORD:
+            assert rtotal == r1 == n_networks + 16
         else:
             assert rtotal == r1 == n_networks
 

From d8516a7b1e464e7ff36d5e3c7ec85d7931c9f408 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 10 Nov 2024 11:04:56 +0000
Subject: [PATCH 128/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/backend/routing.py    | 11 +++++++----
 tests/backend/test_mapper.py |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 5b5219d9..4863b112 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -4,8 +4,8 @@
 from collections.abc import Generator, Iterator
 from typing import Any, ClassVar, Union
 
-from paicorelib import ROUTING_DIRECTIONS_IDX as DIREC_IDX
 from paicorelib import ONLINE_CORES_BASE_COORD
+from paicorelib import ROUTING_DIRECTIONS_IDX as DIREC_IDX
 from paicorelib import ChipCoord, Coord, HwConfig, RoutingCoord
 from paicorelib import RoutingDirection as Direction
 from paicorelib import RoutingLevel as Level
@@ -287,12 +287,15 @@ def get_insert_location(
         """Look for the insertion location of the incoming routing group."""
         n_core_aligned = _nearest_multiple_above(self.n_core_total, n_core_incoming)
 
-        n_core_predicted = n_core_aligned + n_core_incoming 
+        n_core_predicted = n_core_aligned + n_core_incoming
         start_core_inchip = _num_inchip(n_core_aligned)
         end_core_inchip = _num_inchip(n_core_predicted) - n_core_wasted
-        
+
         # If online cores are hit, start from the first core after the online cores
-        if start_core_inchip <= ONLINE_CORES_BASE_COORD and end_core_inchip > ONLINE_CORES_BASE_COORD:
+        if (
+            start_core_inchip <= ONLINE_CORES_BASE_COORD
+            and end_core_inchip > ONLINE_CORES_BASE_COORD
+        ):
             online_end_inchip = ONLINE_CORES_BASE_COORD + HwConfig.N_CORE_ONLINE
             # The first core after the online cores
             online_end = n_core_aligned - start_core_inchip + online_end_inchip
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 2d224241..9bd0022b 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pytest
-from paicorelib import Coord, HwConfig, ONLINE_CORES_BASE_COORD
+from paicorelib import ONLINE_CORES_BASE_COORD, Coord, HwConfig
 from paicorelib import WeightWidth as WW
 
 import paibox as pb

From 451aa0f6c9dd147a0a8c5e2228e06ed391537b9b Mon Sep 17 00:00:00 2001
From: birdswimming <birdswimming3.14@gmail.com>
Date: Wed, 13 Nov 2024 20:05:59 +0800
Subject: [PATCH 129/187] make tick relative in range

---
 paibox/backend/segment_utils.py | 8 ++++----
 paibox/backend/types.py         | 6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/paibox/backend/segment_utils.py b/paibox/backend/segment_utils.py
index 375e4bc0..412147c8 100644
--- a/paibox/backend/segment_utils.py
+++ b/paibox/backend/segment_utils.py
@@ -334,13 +334,13 @@ def aligned_coords(
 
     if tr_offset_stop == tr_offset_start:
         axon_coords = [
-            AxonCoord(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
+            AxonCoord.build(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
             for addr in range(addr_start, addr_stop)
         ]
     else:
         # First row: addr_start -> end
         acoords_first = [
-            AxonCoord(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
+            AxonCoord.build(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
             for addr in range(addr_start, addr_width)
         ]
 
@@ -348,13 +348,13 @@ def aligned_coords(
         acoords_mid = []
         for tr in range(tr_offset_start + 1, tr_offset_stop):
             acoords_mid.extend(
-                AxonCoord(tr_base + tr, (addr_offset + addr) * _addr_interval)
+                AxonCoord.build(tr_base + tr, (addr_offset + addr) * _addr_interval)
                 for addr in range(addr_width)
             )
 
         # Last row: start -> addr_stop
         acoords_last = [
-            AxonCoord(tr_base + tr_offset_stop, (addr_offset + addr) * _addr_interval)
+            AxonCoord.build(tr_base + tr_offset_stop, (addr_offset + addr) * _addr_interval)
             for addr in range(addr_stop)
         ]
 
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index ee76e0b1..7e6af910 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -13,7 +13,7 @@
 else:
     from typing_extensions import TypeAlias
 
-from paicorelib import Coord, CoreMode
+from paicorelib import Coord, CoreMode, HwConfig
 from paicorelib import ReplicationId as RId
 
 from paibox.base import PAIBoxObject
@@ -242,6 +242,10 @@ class AxonCoord(NamedTuple):
     tick_relative: int
     addr_axon: int
 
+    @classmethod
+    def build(cls, tick_relative: int, addr_axon: int) -> "AxonCoord":
+        tick_relative = tick_relative % HwConfig.N_TIMESLOT_MAX
+        return cls(tick_relative, addr_axon)
 
 class AxonSegment(NamedTuple):
     n_axon: int

From b018008548ab391a648c3d54bc8f0dd9fd48d2a8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 13 Nov 2024 12:13:02 +0000
Subject: [PATCH 130/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/backend/segment_utils.py | 12 +++++++++---
 paibox/backend/types.py         |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/paibox/backend/segment_utils.py b/paibox/backend/segment_utils.py
index 412147c8..086a5c6a 100644
--- a/paibox/backend/segment_utils.py
+++ b/paibox/backend/segment_utils.py
@@ -334,13 +334,17 @@ def aligned_coords(
 
     if tr_offset_stop == tr_offset_start:
         axon_coords = [
-            AxonCoord.build(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
+            AxonCoord.build(
+                tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval
+            )
             for addr in range(addr_start, addr_stop)
         ]
     else:
         # First row: addr_start -> end
         acoords_first = [
-            AxonCoord.build(tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval)
+            AxonCoord.build(
+                tr_base + tr_offset_start, (addr_offset + addr) * _addr_interval
+            )
             for addr in range(addr_start, addr_width)
         ]
 
@@ -354,7 +358,9 @@ def aligned_coords(
 
         # Last row: start -> addr_stop
         acoords_last = [
-            AxonCoord.build(tr_base + tr_offset_stop, (addr_offset + addr) * _addr_interval)
+            AxonCoord.build(
+                tr_base + tr_offset_stop, (addr_offset + addr) * _addr_interval
+            )
             for addr in range(addr_stop)
         ]
 
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 7e6af910..dacf9dcd 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -247,6 +247,7 @@ def build(cls, tick_relative: int, addr_axon: int) -> "AxonCoord":
         tick_relative = tick_relative % HwConfig.N_TIMESLOT_MAX
         return cls(tick_relative, addr_axon)
 
+
 class AxonSegment(NamedTuple):
     n_axon: int
     """#N of axons."""

From 5f4d7a030a204129e3efc1d0c2a93799c92dd2ea Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 20 Nov 2024 15:58:11 +0800
Subject: [PATCH 131/187] =?UTF-8?q?=F0=9F=93=9D=20add=20supporting=20ops?=
 =?UTF-8?q?=20doc?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md           |  8 +++---
 docs/Support-Ops.md | 59 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 docs/Support-Ops.md

diff --git a/README.md b/README.md
index acf88399..d4b2dbfa 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,10 @@
     </a>
 </p>
 
-👉 用户使用指南：[Guide-of-PAIBox](docs/Guide-of-PAIBox.md)
+👉 [用户使用指南](docs/Guide-of-PAIBox.md)
 
-高效编写测试项目指南：[Guide-of-Test](docs/Guide-of-Test.md)
+👉 [支持算子](docs/Support-Ops.md)
 
-[Changelog](./CHANGELOG.md)
+👉 [高效编写测试项目指南](docs/Guide-of-Test.md)
+
+👉 [Changelog](./CHANGELOG.md)
diff --git a/docs/Support-Ops.md b/docs/Support-Ops.md
new file mode 100644
index 00000000..c1b16d62
--- /dev/null
+++ b/docs/Support-Ops.md
@@ -0,0 +1,59 @@
+# 算子支持
+
+## 神经元
+
+### 配置项
+
+芯片所支持的神经元配置项如下表所列：
+
+|         支持功能         | 可写 |         取值         | 功能描述                                                                                                                                                                         |
+| :-----------------------: | :--: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+|         复位模式         |  ✅  | 硬复位/软复位/不复位 | 硬复位，膜电平重置为正/负阈值<br />软复位，膜电平将减正阈值/加负阈值（若负阈值模式为复位模式）<br />不复位，膜电平保持不变                                                       |
+|         复位电平         |  ✅  |    30比特有符号数    | 可配置复位电平                                                                                                                                                                   |
+|       比较前后泄露       |  ✅  |        前/后        | 阈值比较发生在泄露前/后                                                                                                                                                          |
+|          正阈值          |  ✅  |    29比特无符号数    | 可配置正阈值                                                                                                                                                                     |
+|          负阈值          |  ✅  |    29比特无符号数    | 可配置负阈值                                                                                                                                                                     |
+|         泄露电平         |  ✅  |    30比特有符号数    | 可配置泄露幅值                                                                                                                                                                   |
+|         反向泄露         |  ✅  |      开启/关闭      | 若开启，泄露与当前膜电平符号相关：<br />当泄露值为正，膜电平向0收敛<br />当泄露值为负，膜电平偏离0发散                                                                           |
+|        负阈值模式        |  ✅  |      复位/饱和      | 当膜电平低于负阈值时：<br />为复位模式，根据复位模式复位<br />为饱和模式，膜电平重置为负阈值                                                                                     |
+| 膜电平截取位（仅ANN模式） |  ✅  |        [0,29]        | 输出膜电平的截取位置T，30比特有符号膜电平需截取8比特作为输出：<br />T<8，截取[T-1:0]，低位补0 <br />T=8，截取[7:0]<br />T≤29，截取[T-1:T-8]<br />膜电平大于窗口最高位则截断处理 |
+|       随机轴突整合       |  ✅  |      开启/关闭      | 若开启，神经元根据硬件生成的随机数\*过滤一些轴突上的输入，进行选择性累加                                                                                                         |
+|         随机泄露         |  ✅  |      开启/关闭      | 若开启，如果泄露幅值小于硬件生成的随机数\*，则此次泄露为0                                                                                                                        |
+|         阈值掩码         |  ✅  |        [0,29]        | 若开启，硬件生成的随机数\*将和它求与后得到一个0\~29比特随机阈值，并加至神经元的正、负阈值上                                                                                      |
+|          膜电平          |  ❌  |          0          | 只读寄存器，初始值为0                                                                                                                                                            |
+
+\*硬件生成的随机数均为无符号数。
+
+## 突触
+
+芯片不支持Alpha、AMBA、GABA等类型突触。
+
+## 算子
+
+包括突触与突触+神经元组合形式的算子。
+
+|         算子类型         | ANN | SNN |     备注     |
+| :----------------------: | :-: | :-: | :----------: |
+|          全连接          | ✅ | ✅ |              |
+|        2D矩阵乘法        | ✅ | ✅ |              |
+|          1D卷积          | ✅ | ✅ |  全展开形式  |
+|          2D卷积          | ✅ | ✅ |  全展开形式  |
+|        1D转置卷积        | ✅ | ✅ |  全展开形式  |
+|        2D转置卷积        | ✅ | ✅ |  全展开形式  |
+|           位与           | ❌ | ✅ |              |
+|           位或           | ❌ | ✅ |              |
+|           位非           | ❌ | ✅ |              |
+|          位异或          | ❌ | ✅ |              |
+|        1D平均池化        | ❌ | ✅ |    脉冲化    |
+| 1D平均池化（膜电位相关） | ❌ | ✅ |    脉冲化    |
+|        1D最大池化        | ❌ | ✅ |    脉冲化    |
+|        2D平均池化        | ❌ | ✅ |    脉冲化    |
+| 2D平均池化（膜电位相关） | ❌ | ✅ |    脉冲化    |
+|        2D最大池化        | ❌ | ✅ |    脉冲化    |
+|          脉冲加          | ❌ | ✅ | 针对脉冲序列 |
+|          脉冲减          | ❌ | ✅ | 针对脉冲序列 |
+|          线性层          | ✅ | ❌ |              |
+|          2D卷积          | ✅ | ❌ |  半折叠形式  |
+|        2D最大池化        | ✅ | ❌ |  半折叠形式  |
+|        2D平均池化        | ✅ | ❌ |  半折叠形式  |
+|          线性层          | ✅ | ❌ |  半折叠形式  |

From 397264f84276e5c87e3120bf8919cfe64080d9d4 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 20 Nov 2024 16:03:39 +0800
Subject: [PATCH 132/187] =?UTF-8?q?=F0=9F=94=A8=20use=20`UserDict`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/context.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/paibox/context.py b/paibox/context.py
index 6a712b1e..f645da4a 100644
--- a/paibox/context.py
+++ b/paibox/context.py
@@ -1,3 +1,4 @@
+from collections import UserDict
 from typing import Any, TypeVar
 
 __all__ = ["FRONTEND_ENV"]
@@ -6,10 +7,8 @@
 _KT = TypeVar("_KT")
 _VT = TypeVar("_VT")
 
-# XXX: use collections.UserDict[_KT, _VT] in 3.9+
 
-
-class _Context(dict[_KT, _VT]):
+class _Context(UserDict[_KT, _VT]):
     def load(self, key: Any, default: Any = None) -> Any:
         """Load the context by the `key`.
 

From 1861b2ed0141a334f9386a73ae565989c1416435 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 20 Nov 2024 16:05:34 +0800
Subject: [PATCH 133/187] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20`paicorelib`=20bum?=
 =?UTF-8?q?p=20to=20`>=3D1.3.1`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 poetry.lock    | 332 +++++++++++++++++++++++++------------------------
 pyproject.toml |   2 +-
 2 files changed, 171 insertions(+), 163 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 4645ad5b..f87c950d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "annotated-types"
@@ -119,62 +119,69 @@ reference = "tsinghua"
 
 [[package]]
 name = "orjson"
-version = "3.10.6"
+version = "3.10.11"
 description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "orjson-3.10.6-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:fb0ee33124db6eaa517d00890fc1a55c3bfe1cf78ba4a8899d71a06f2d6ff5c7"},
-    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1c4b53b24a4c06547ce43e5fee6ec4e0d8fe2d597f4647fc033fd205707365"},
-    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eadc8fd310edb4bdbd333374f2c8fec6794bbbae99b592f448d8214a5e4050c0"},
-    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61272a5aec2b2661f4fa2b37c907ce9701e821b2c1285d5c3ab0207ebd358d38"},
-    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57985ee7e91d6214c837936dc1608f40f330a6b88bb13f5a57ce5257807da143"},
-    {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:633a3b31d9d7c9f02d49c4ab4d0a86065c4a6f6adc297d63d272e043472acab5"},
-    {file = "orjson-3.10.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1c680b269d33ec444afe2bdc647c9eb73166fa47a16d9a75ee56a374f4a45f43"},
-    {file = "orjson-3.10.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f759503a97a6ace19e55461395ab0d618b5a117e8d0fbb20e70cfd68a47327f2"},
-    {file = "orjson-3.10.6-cp310-none-win32.whl", hash = "sha256:95a0cce17f969fb5391762e5719575217bd10ac5a189d1979442ee54456393f3"},
-    {file = "orjson-3.10.6-cp310-none-win_amd64.whl", hash = "sha256:df25d9271270ba2133cc88ee83c318372bdc0f2cd6f32e7a450809a111efc45c"},
-    {file = "orjson-3.10.6-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:b1ec490e10d2a77c345def52599311849fc063ae0e67cf4f84528073152bb2ba"},
-    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d43d3feb8f19d07e9f01e5b9be4f28801cf7c60d0fa0d279951b18fae1932b"},
-    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac3045267e98fe749408eee1593a142e02357c5c99be0802185ef2170086a863"},
-    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c27bc6a28ae95923350ab382c57113abd38f3928af3c80be6f2ba7eb8d8db0b0"},
-    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d27456491ca79532d11e507cadca37fb8c9324a3976294f68fb1eff2dc6ced5a"},
-    {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05ac3d3916023745aa3b3b388e91b9166be1ca02b7c7e41045da6d12985685f0"},
-    {file = "orjson-3.10.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1335d4ef59ab85cab66fe73fd7a4e881c298ee7f63ede918b7faa1b27cbe5212"},
-    {file = "orjson-3.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4bbc6d0af24c1575edc79994c20e1b29e6fb3c6a570371306db0993ecf144dc5"},
-    {file = "orjson-3.10.6-cp311-none-win32.whl", hash = "sha256:450e39ab1f7694465060a0550b3f6d328d20297bf2e06aa947b97c21e5241fbd"},
-    {file = "orjson-3.10.6-cp311-none-win_amd64.whl", hash = "sha256:227df19441372610b20e05bdb906e1742ec2ad7a66ac8350dcfd29a63014a83b"},
-    {file = "orjson-3.10.6-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ea2977b21f8d5d9b758bb3f344a75e55ca78e3ff85595d248eee813ae23ecdfb"},
-    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6f3d167d13a16ed263b52dbfedff52c962bfd3d270b46b7518365bcc2121eed"},
-    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f710f346e4c44a4e8bdf23daa974faede58f83334289df80bc9cd12fe82573c7"},
-    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7275664f84e027dcb1ad5200b8b18373e9c669b2a9ec33d410c40f5ccf4b257e"},
-    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0943e4c701196b23c240b3d10ed8ecd674f03089198cf503105b474a4f77f21f"},
-    {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:446dee5a491b5bc7d8f825d80d9637e7af43f86a331207b9c9610e2f93fee22a"},
-    {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:64c81456d2a050d380786413786b057983892db105516639cb5d3ee3c7fd5148"},
-    {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"},
-    {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"},
-    {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"},
-    {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"},
-    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"},
-    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"},
-    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2c116072a8533f2fec435fde4d134610f806bdac20188c7bd2081f3e9e0133f"},
-    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6eeb13218c8cf34c61912e9df2de2853f1d009de0e46ea09ccdf3d757896af0a"},
-    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:965a916373382674e323c957d560b953d81d7a8603fbeee26f7b8248638bd48b"},
-    {file = "orjson-3.10.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03c95484d53ed8e479cade8628c9cea00fd9d67f5554764a1110e0d5aa2de96e"},
-    {file = "orjson-3.10.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:e060748a04cccf1e0a6f2358dffea9c080b849a4a68c28b1b907f272b5127e9b"},
-    {file = "orjson-3.10.6-cp38-none-win32.whl", hash = "sha256:738dbe3ef909c4b019d69afc19caf6b5ed0e2f1c786b5d6215fbb7539246e4c6"},
-    {file = "orjson-3.10.6-cp38-none-win_amd64.whl", hash = "sha256:d40f839dddf6a7d77114fe6b8a70218556408c71d4d6e29413bb5f150a692ff7"},
-    {file = "orjson-3.10.6-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:697a35a083c4f834807a6232b3e62c8b280f7a44ad0b759fd4dce748951e70db"},
-    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd502f96bf5ea9a61cbc0b2b5900d0dd68aa0da197179042bdd2be67e51a1e4b"},
-    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f215789fb1667cdc874c1b8af6a84dc939fd802bf293a8334fce185c79cd359b"},
-    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2debd8ddce948a8c0938c8c93ade191d2f4ba4649a54302a7da905a81f00b56"},
-    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5410111d7b6681d4b0d65e0f58a13be588d01b473822483f77f513c7f93bd3b2"},
-    {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb1f28a137337fdc18384079fa5726810681055b32b92253fa15ae5656e1dddb"},
-    {file = "orjson-3.10.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bf2fbbce5fe7cd1aa177ea3eab2b8e6a6bc6e8592e4279ed3db2d62e57c0e1b2"},
-    {file = "orjson-3.10.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:79b9b9e33bd4c517445a62b90ca0cc279b0f1f3970655c3df9e608bc3f91741a"},
-    {file = "orjson-3.10.6-cp39-none-win32.whl", hash = "sha256:30b0a09a2014e621b1adf66a4f705f0809358350a757508ee80209b2d8dae219"},
-    {file = "orjson-3.10.6-cp39-none-win_amd64.whl", hash = "sha256:49e3bc615652617d463069f91b867a4458114c5b104e13b7ae6872e5f79d0844"},
-    {file = "orjson-3.10.6.tar.gz", hash = "sha256:e54b63d0a7c6c54a5f5f726bc93a2078111ef060fec4ecbf34c5db800ca3b3a7"},
+    {file = "orjson-3.10.11-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6dade64687f2bd7c090281652fe18f1151292d567a9302b34c2dbb92a3872f1f"},
+    {file = "orjson-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82f07c550a6ccd2b9290849b22316a609023ed851a87ea888c0456485a7d196a"},
+    {file = "orjson-3.10.11-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd9a187742d3ead9df2e49240234d728c67c356516cf4db018833a86f20ec18c"},
+    {file = "orjson-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:77b0fed6f209d76c1c39f032a70df2d7acf24b1812ca3e6078fd04e8972685a3"},
+    {file = "orjson-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63fc9d5fe1d4e8868f6aae547a7b8ba0a2e592929245fff61d633f4caccdcdd6"},
+    {file = "orjson-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65cd3e3bb4fbb4eddc3c1e8dce10dc0b73e808fcb875f9fab40c81903dd9323e"},
+    {file = "orjson-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6f67c570602300c4befbda12d153113b8974a3340fdcf3d6de095ede86c06d92"},
+    {file = "orjson-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1f39728c7f7d766f1f5a769ce4d54b5aaa4c3f92d5b84817053cc9995b977acc"},
+    {file = "orjson-3.10.11-cp310-none-win32.whl", hash = "sha256:1789d9db7968d805f3d94aae2c25d04014aae3a2fa65b1443117cd462c6da647"},
+    {file = "orjson-3.10.11-cp310-none-win_amd64.whl", hash = "sha256:5576b1e5a53a5ba8f8df81872bb0878a112b3ebb1d392155f00f54dd86c83ff6"},
+    {file = "orjson-3.10.11-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1444f9cb7c14055d595de1036f74ecd6ce15f04a715e73f33bb6326c9cef01b6"},
+    {file = "orjson-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdec57fe3b4bdebcc08a946db3365630332dbe575125ff3d80a3272ebd0ddafe"},
+    {file = "orjson-3.10.11-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4eed32f33a0ea6ef36ccc1d37f8d17f28a1d6e8eefae5928f76aff8f1df85e67"},
+    {file = "orjson-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80df27dd8697242b904f4ea54820e2d98d3f51f91e97e358fc13359721233e4b"},
+    {file = "orjson-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:705f03cee0cb797256d54de6695ef219e5bc8c8120b6654dd460848d57a9af3d"},
+    {file = "orjson-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03246774131701de8e7059b2e382597da43144a9a7400f178b2a32feafc54bd5"},
+    {file = "orjson-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8b5759063a6c940a69c728ea70d7c33583991c6982915a839c8da5f957e0103a"},
+    {file = "orjson-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:677f23e32491520eebb19c99bb34675daf5410c449c13416f7f0d93e2cf5f981"},
+    {file = "orjson-3.10.11-cp311-none-win32.whl", hash = "sha256:a11225d7b30468dcb099498296ffac36b4673a8398ca30fdaec1e6c20df6aa55"},
+    {file = "orjson-3.10.11-cp311-none-win_amd64.whl", hash = "sha256:df8c677df2f9f385fcc85ab859704045fa88d4668bc9991a527c86e710392bec"},
+    {file = "orjson-3.10.11-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:360a4e2c0943da7c21505e47cf6bd725588962ff1d739b99b14e2f7f3545ba51"},
+    {file = "orjson-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:496e2cb45de21c369079ef2d662670a4892c81573bcc143c4205cae98282ba97"},
+    {file = "orjson-3.10.11-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7dfa8db55c9792d53c5952900c6a919cfa377b4f4534c7a786484a6a4a350c19"},
+    {file = "orjson-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:51f3382415747e0dbda9dade6f1e1a01a9d37f630d8c9049a8ed0e385b7a90c0"},
+    {file = "orjson-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f35a1b9f50a219f470e0e497ca30b285c9f34948d3c8160d5ad3a755d9299433"},
+    {file = "orjson-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2f3b7c5803138e67028dde33450e054c87e0703afbe730c105f1fcd873496d5"},
+    {file = "orjson-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f91d9eb554310472bd09f5347950b24442600594c2edc1421403d7610a0998fd"},
+    {file = "orjson-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dfbb2d460a855c9744bbc8e36f9c3a997c4b27d842f3d5559ed54326e6911f9b"},
+    {file = "orjson-3.10.11-cp312-none-win32.whl", hash = "sha256:d4a62c49c506d4d73f59514986cadebb7e8d186ad510c518f439176cf8d5359d"},
+    {file = "orjson-3.10.11-cp312-none-win_amd64.whl", hash = "sha256:f1eec3421a558ff7a9b010a6c7effcfa0ade65327a71bb9b02a1c3b77a247284"},
+    {file = "orjson-3.10.11-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c46294faa4e4d0eb73ab68f1a794d2cbf7bab33b1dda2ac2959ffb7c61591899"},
+    {file = "orjson-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52e5834d7d6e58a36846e059d00559cb9ed20410664f3ad156cd2cc239a11230"},
+    {file = "orjson-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2fc947e5350fdce548bfc94f434e8760d5cafa97fb9c495d2fef6757aa02ec0"},
+    {file = "orjson-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0efabbf839388a1dab5b72b5d3baedbd6039ac83f3b55736eb9934ea5494d258"},
+    {file = "orjson-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a3f29634260708c200c4fe148e42b4aae97d7b9fee417fbdd74f8cfc265f15b0"},
+    {file = "orjson-3.10.11-cp313-none-win32.whl", hash = "sha256:1a1222ffcee8a09476bbdd5d4f6f33d06d0d6642df2a3d78b7a195ca880d669b"},
+    {file = "orjson-3.10.11-cp313-none-win_amd64.whl", hash = "sha256:bc274ac261cc69260913b2d1610760e55d3c0801bb3457ba7b9004420b6b4270"},
+    {file = "orjson-3.10.11-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:19b3763e8bbf8ad797df6b6b5e0fc7c843ec2e2fc0621398534e0c6400098f87"},
+    {file = "orjson-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1be83a13312e5e58d633580c5eb8d0495ae61f180da2722f20562974188af205"},
+    {file = "orjson-3.10.11-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:afacfd1ab81f46dedd7f6001b6d4e8de23396e4884cd3c3436bd05defb1a6446"},
+    {file = "orjson-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cb4d0bea56bba596723d73f074c420aec3b2e5d7d30698bc56e6048066bd560c"},
+    {file = "orjson-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96ed1de70fcb15d5fed529a656df29f768187628727ee2788344e8a51e1c1350"},
+    {file = "orjson-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bfb30c891b530f3f80e801e3ad82ef150b964e5c38e1fb8482441c69c35c61c"},
+    {file = "orjson-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d496c74fc2b61341e3cefda7eec21b7854c5f672ee350bc55d9a4997a8a95204"},
+    {file = "orjson-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:655a493bac606655db9a47fe94d3d84fc7f3ad766d894197c94ccf0c5408e7d3"},
+    {file = "orjson-3.10.11-cp38-none-win32.whl", hash = "sha256:b9546b278c9fb5d45380f4809e11b4dd9844ca7aaf1134024503e134ed226161"},
+    {file = "orjson-3.10.11-cp38-none-win_amd64.whl", hash = "sha256:b592597fe551d518f42c5a2eb07422eb475aa8cfdc8c51e6da7054b836b26782"},
+    {file = "orjson-3.10.11-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c95f2ecafe709b4e5c733b5e2768ac569bed308623c85806c395d9cca00e08af"},
+    {file = "orjson-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80c00d4acded0c51c98754fe8218cb49cb854f0f7eb39ea4641b7f71732d2cb7"},
+    {file = "orjson-3.10.11-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:461311b693d3d0a060439aa669c74f3603264d4e7a08faa68c47ae5a863f352d"},
+    {file = "orjson-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52ca832f17d86a78cbab86cdc25f8c13756ebe182b6fc1a97d534051c18a08de"},
+    {file = "orjson-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c57ea78a753812f528178aa2f1c57da633754c91d2124cb28991dab4c79a54"},
+    {file = "orjson-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7fcfc6f7ca046383fb954ba528587e0f9336828b568282b27579c49f8e16aad"},
+    {file = "orjson-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:86b9dd983857970c29e4c71bb3e95ff085c07d3e83e7c46ebe959bac07ebd80b"},
+    {file = "orjson-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4d83f87582d223e54efb2242a79547611ba4ebae3af8bae1e80fa9a0af83bb7f"},
+    {file = "orjson-3.10.11-cp39-none-win32.whl", hash = "sha256:9fd0ad1c129bc9beb1154c2655f177620b5beaf9a11e0d10bac63ef3fce96950"},
+    {file = "orjson-3.10.11-cp39-none-win_amd64.whl", hash = "sha256:10f416b2a017c8bd17f325fb9dee1fb5cdd7a54e814284896b7c3f2763faa017"},
+    {file = "orjson-3.10.11.tar.gz", hash = "sha256:e35b6d730de6384d5b2dab5fd23f0d76fae8bbc8c353c2f78210aa5fa4beb3ef"},
 ]
 
 [package.source]
@@ -184,13 +191,13 @@ reference = "tsinghua"
 
 [[package]]
 name = "packaging"
-version = "24.1"
+version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
-    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
+    {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
+    {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
 ]
 
 [package.source]
@@ -200,7 +207,7 @@ reference = "tsinghua"
 
 [[package]]
 name = "paicorelib"
-version = "1.3.0"
+version = "1.3.1"
 description = "Library of PAICORE 2.0"
 optional = false
 python-versions = "^3.9"
@@ -215,7 +222,7 @@ pydantic = "^2.0.3"
 type = "git"
 url = "https://github.com/PAICookers/PAIlib.git"
 reference = "dev"
-resolved_reference = "5cedc5fb1f66bc21e1c442a87bc804517a6555c2"
+resolved_reference = "36c76aca47dc6874195bbe65d29a706d207e2af3"
 
 [[package]]
 name = "pluggy"
@@ -239,18 +246,18 @@ reference = "tsinghua"
 
 [[package]]
 name = "pydantic"
-version = "2.8.2"
+version = "2.9.2"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.8.2-py3-none-any.whl", hash = "sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8"},
-    {file = "pydantic-2.8.2.tar.gz", hash = "sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a"},
+    {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"},
+    {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"},
 ]
 
 [package.dependencies]
-annotated-types = ">=0.4.0"
-pydantic-core = "2.20.1"
+annotated-types = ">=0.6.0"
+pydantic-core = "2.23.4"
 typing-extensions = [
     {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
     {version = ">=4.6.1", markers = "python_version < \"3.13\""},
@@ -258,6 +265,7 @@ typing-extensions = [
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
+timezone = ["tzdata"]
 
 [package.source]
 type = "legacy"
@@ -266,100 +274,100 @@ reference = "tsinghua"
 
 [[package]]
 name = "pydantic-core"
-version = "2.20.1"
+version = "2.23.4"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f239eb799a2081495ea659d8d4a43a8f42cd1fe9ff2e7e436295c38a10c286a"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53e431da3fc53360db73eedf6f7124d1076e1b4ee4276b36fb25514544ceb4a3"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1f62b2413c3a0e846c3b838b2ecd6c7a19ec6793b2a522745b0869e37ab5bc1"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d41e6daee2813ecceea8eda38062d69e280b39df793f5a942fa515b8ed67953"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d482efec8b7dc6bfaedc0f166b2ce349df0011f5d2f1f25537ced4cfc34fd98"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e93e1a4b4b33daed65d781a57a522ff153dcf748dee70b40c7258c5861e1768a"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7c4ea22b6739b162c9ecaaa41d718dfad48a244909fe7ef4b54c0b530effc5a"},
-    {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4f2790949cf385d985a31984907fecb3896999329103df4e4983a4a41e13e840"},
-    {file = "pydantic_core-2.20.1-cp310-none-win32.whl", hash = "sha256:5e999ba8dd90e93d57410c5e67ebb67ffcaadcea0ad973240fdfd3a135506250"},
-    {file = "pydantic_core-2.20.1-cp310-none-win_amd64.whl", hash = "sha256:512ecfbefef6dac7bc5eaaf46177b2de58cdf7acac8793fe033b24ece0b9566c"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d2a8fa9d6d6f891f3deec72f5cc668e6f66b188ab14bb1ab52422fe8e644f312"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:175873691124f3d0da55aeea1d90660a6ea7a3cfea137c38afa0a5ffabe37b88"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37eee5b638f0e0dcd18d21f59b679686bbd18917b87db0193ae36f9c23c355fc"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25e9185e2d06c16ee438ed39bf62935ec436474a6ac4f9358524220f1b236e43"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:150906b40ff188a3260cbee25380e7494ee85048584998c1e66df0c7a11c17a6"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ad4aeb3e9a97286573c03df758fc7627aecdd02f1da04516a86dc159bf70121"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3f3ed29cd9f978c604708511a1f9c2fdcb6c38b9aae36a51905b8811ee5cbf1"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0dae11d8f5ded51699c74d9548dcc5938e0804cc8298ec0aa0da95c21fff57b"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faa6b09ee09433b87992fb5a2859efd1c264ddc37280d2dd5db502126d0e7f27"},
-    {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9dc1b507c12eb0481d071f3c1808f0529ad41dc415d0ca11f7ebfc666e66a18b"},
-    {file = "pydantic_core-2.20.1-cp311-none-win32.whl", hash = "sha256:fa2fddcb7107e0d1808086ca306dcade7df60a13a6c347a7acf1ec139aa6789a"},
-    {file = "pydantic_core-2.20.1-cp311-none-win_amd64.whl", hash = "sha256:40a783fb7ee353c50bd3853e626f15677ea527ae556429453685ae32280c19c2"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:595ba5be69b35777474fa07f80fc260ea71255656191adb22a8c53aba4479231"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a4f55095ad087474999ee28d3398bae183a66be4823f753cd7d67dd0153427c9"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9aa05d09ecf4c75157197f27cdc9cfaeb7c5f15021c6373932bf3e124af029f"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e97fdf088d4b31ff4ba35db26d9cc472ac7ef4a2ff2badeabf8d727b3377fc52"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc633a9fe1eb87e250b5c57d389cf28998e4292336926b0b6cdaee353f89a237"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d573faf8eb7e6b1cbbcb4f5b247c60ca8be39fe2c674495df0eb4318303137fe"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26dc97754b57d2fd00ac2b24dfa341abffc380b823211994c4efac7f13b9e90e"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:33499e85e739a4b60c9dac710c20a08dc73cb3240c9a0e22325e671b27b70d24"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bebb4d6715c814597f85297c332297c6ce81e29436125ca59d1159b07f423eb1"},
-    {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:516d9227919612425c8ef1c9b869bbbee249bc91912c8aaffb66116c0b447ebd"},
-    {file = "pydantic_core-2.20.1-cp312-none-win32.whl", hash = "sha256:469f29f9093c9d834432034d33f5fe45699e664f12a13bf38c04967ce233d688"},
-    {file = "pydantic_core-2.20.1-cp312-none-win_amd64.whl", hash = "sha256:035ede2e16da7281041f0e626459bcae33ed998cca6a0a007a5ebb73414ac72d"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0827505a5c87e8aa285dc31e9ec7f4a17c81a813d45f70b1d9164e03a813a686"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:19c0fa39fa154e7e0b7f82f88ef85faa2a4c23cc65aae2f5aea625e3c13c735a"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa223cd1e36b642092c326d694d8bf59b71ddddc94cdb752bbbb1c5c91d833b"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c336a6d235522a62fef872c6295a42ecb0c4e1d0f1a3e500fe949415761b8a19"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7eb6a0587eded33aeefea9f916899d42b1799b7b14b8f8ff2753c0ac1741edac"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70c8daf4faca8da5a6d655f9af86faf6ec2e1768f4b8b9d0226c02f3d6209703"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9fa4c9bf273ca41f940bceb86922a7667cd5bf90e95dbb157cbb8441008482c"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11b71d67b4725e7e2a9f6e9c0ac1239bbc0c48cce3dc59f98635efc57d6dac83"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:270755f15174fb983890c49881e93f8f1b80f0b5e3a3cc1394a255706cabd203"},
-    {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c81131869240e3e568916ef4c307f8b99583efaa60a8112ef27a366eefba8ef0"},
-    {file = "pydantic_core-2.20.1-cp313-none-win32.whl", hash = "sha256:b91ced227c41aa29c672814f50dbb05ec93536abf8f43cd14ec9521ea09afe4e"},
-    {file = "pydantic_core-2.20.1-cp313-none-win_amd64.whl", hash = "sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4745f4ac52cc6686390c40eaa01d48b18997cb130833154801a442323cc78f91"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a8ad4c766d3f33ba8fd692f9aa297c9058970530a32c728a2c4bfd2616d3358b"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41e81317dd6a0127cabce83c0c9c3fbecceae981c8391e6f1dec88a77c8a569a"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04024d270cf63f586ad41fff13fde4311c4fc13ea74676962c876d9577bcc78f"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaad4ff2de1c3823fddf82f41121bdf453d922e9a238642b1dedb33c4e4f98ad"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26ab812fa0c845df815e506be30337e2df27e88399b985d0bb4e3ecfe72df31c"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c5ebac750d9d5f2706654c638c041635c385596caf68f81342011ddfa1e5598"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2aafc5a503855ea5885559eae883978c9b6d8c8993d67766ee73d82e841300dd"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4868f6bd7c9d98904b748a2653031fc9c2f85b6237009d475b1008bfaeb0a5aa"},
-    {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa2f457b4af386254372dfa78a2eda2563680d982422641a85f271c859df1987"},
-    {file = "pydantic_core-2.20.1-cp38-none-win32.whl", hash = "sha256:225b67a1f6d602de0ce7f6c1c3ae89a4aa25d3de9be857999e9124f15dab486a"},
-    {file = "pydantic_core-2.20.1-cp38-none-win_amd64.whl", hash = "sha256:6b507132dcfc0dea440cce23ee2182c0ce7aba7054576efc65634f080dbe9434"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b03f7941783b4c4a26051846dea594628b38f6940a2fdc0df00b221aed39314c"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1eedfeb6089ed3fad42e81a67755846ad4dcc14d73698c120a82e4ccf0f1f9f6"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:635fee4e041ab9c479e31edda27fcf966ea9614fff1317e280d99eb3e5ab6fe2"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:77bf3ac639c1ff567ae3b47f8d4cc3dc20f9966a2a6dd2311dcc055d3d04fb8a"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ed1b0132f24beeec5a78b67d9388656d03e6a7c837394f99257e2d55b461611"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6514f963b023aeee506678a1cf821fe31159b925c4b76fe2afa94cc70b3222b"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d4204d8ca33146e761c79f83cc861df20e7ae9f6487ca290a97702daf56006"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2d036c7187b9422ae5b262badb87a20a49eb6c5238b2004e96d4da1231badef1"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9ebfef07dbe1d93efb94b4700f2d278494e9162565a54f124c404a5656d7ff09"},
-    {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6b9d9bb600328a1ce523ab4f454859e9d439150abb0906c5a1983c146580ebab"},
-    {file = "pydantic_core-2.20.1-cp39-none-win32.whl", hash = "sha256:784c1214cb6dd1e3b15dd8b91b9a53852aed16671cc3fbe4786f4f1db07089e2"},
-    {file = "pydantic_core-2.20.1-cp39-none-win_amd64.whl", hash = "sha256:d2fe69c5434391727efa54b47a1e7986bb0186e72a41b203df8f5b0a19a4f669"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a45f84b09ac9c3d35dfcf6a27fd0634d30d183205230a0ebe8373a0e8cfa0906"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d02a72df14dfdbaf228424573a07af10637bd490f0901cee872c4f434a735b94"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b27e6af28f07e2f195552b37d7d66b150adbaa39a6d327766ffd695799780f"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084659fac3c83fd674596612aeff6041a18402f1e1bc19ca39e417d554468482"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:242b8feb3c493ab78be289c034a1f659e8826e2233786e36f2893a950a719bb6"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:38cf1c40a921d05c5edc61a785c0ddb4bed67827069f535d794ce6bcded919fc"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e0bbdd76ce9aa5d4209d65f2b27fc6e5ef1312ae6c5333c26db3f5ade53a1e99"},
-    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:407653af5617f0757261ae249d3fba09504d7a71ab36ac057c938572d1bc9331"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c693e916709c2465b02ca0ad7b387c4f8423d1db7b4649c551f27a529181c5ad"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b5ff4911aea936a47d9376fd3ab17e970cc543d1b68921886e7f64bd28308d1"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177f55a886d74f1808763976ac4efd29b7ed15c69f4d838bbd74d9d09cf6fa86"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:964faa8a861d2664f0c7ab0c181af0bea66098b1919439815ca8803ef136fc4e"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4dd484681c15e6b9a977c785a345d3e378d72678fd5f1f3c0509608da24f2ac0"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f6d6cff3538391e8486a431569b77921adfcdef14eb18fbf19b7c0a5294d4e6a"},
-    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6d511cc297ff0883bc3708b465ff82d7560193169a8b93260f74ecb0a5e08a7"},
-    {file = "pydantic_core-2.20.1.tar.gz", hash = "sha256:26ca695eeee5f9f1aeeb211ffc12f10bcb6f71e2989988fda61dabd65db878d4"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"},
+    {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"},
+    {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"},
+    {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"},
+    {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"},
+    {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"},
+    {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"},
+    {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"},
+    {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"},
+    {file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"},
+    {file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"},
+    {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"},
+    {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"},
+    {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"},
 ]
 
 [package.dependencies]
@@ -372,13 +380,13 @@ reference = "tsinghua"
 
 [[package]]
 name = "pytest"
-version = "8.2.2"
+version = "8.3.3"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"},
-    {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"},
+    {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"},
+    {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"},
 ]
 
 [package.dependencies]
@@ -386,7 +394,7 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""}
 exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
 iniconfig = "*"
 packaging = "*"
-pluggy = ">=1.5,<2.0"
+pluggy = ">=1.5,<2"
 tomli = {version = ">=1", markers = "python_version < \"3.11\""}
 
 [package.extras]
@@ -418,13 +426,13 @@ reference = "tsinghua"
 
 [[package]]
 name = "tomli"
-version = "2.0.1"
+version = "2.1.0"
 description = "A lil' TOML parser"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
-    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+    {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"},
+    {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"},
 ]
 
 [package.source]
@@ -451,4 +459,4 @@ reference = "tsinghua"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "ddc3c2f447aeb01635bbdaefe64e188ad015b016d082d956a06e1d73b39dd132"
+content-hash = "25cfd043004050d36aa3b7c7bede80ae551fda620af4c9d8600ffde29b8f8c61"
diff --git a/pyproject.toml b/pyproject.toml
index 75cc1f5b..3788a3d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ include = ["docs/Guide-of-PAIBox.md", "CHANGELOG.md"]
 python = "^3.9"
 pydantic = "^2.0.3"
 numpy = "^1.26.0"
-paicorelib = "~1.3"
+paicorelib = ">=1.3.1"
 
 [tool.poetry.group.test]
 optional = true

From d8b7f5dca8cd6b7f9f83420be16efaf45628e8b0 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 20 Nov 2024 16:07:42 +0800
Subject: [PATCH 134/187] =?UTF-8?q?=F0=9F=93=9D=20update=20docs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/Guide-of-PAIBox.md | 92 ++++++++++++++++++++---------------------
 docs/Guide-of-Test.md   |  2 +-
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/docs/Guide-of-PAIBox.md b/docs/Guide-of-PAIBox.md
index 94bd1039..9c648cce 100644
--- a/docs/Guide-of-PAIBox.md
+++ b/docs/Guide-of-PAIBox.md
@@ -1,18 +1,5 @@
-<style>
-.center
-{
-    width: auto;
-    display: table;
-    margin-left: auto;
-    margin-right: auto;
-}
-</style>
-
-<div align="center">
 # PAIBox使用指南
 
-</div>
-
 ## 安装
 
 ```toml
@@ -60,12 +47,12 @@ PAIBox 提供了多种类型的神经元模型，能够实现各种特殊的功
 
 #### IF
 
-IF 神经元实现了经典的“积分发射”模型，其调用方式及参数如下：
+IF 神经元实现了经典的“积分-发射”模型，其调用方式及参数如下：
 
 ```python
 import paibox as pb
 
-n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=False, delay=1, tick_wait_start=1, tick_wait_end=0, name='n1')
+n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=True, delay=1, tick_wait_start=1, tick_wait_end=0, name='n1')
 ```
 
 其中：
@@ -84,9 +71,6 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Fa
 
 神经元的部分行为由芯片计算核的某些配置项决定：输入数据位数、输出数据位数、SNN使能。芯片计算核的工作模式即由这些参数决定。例如，SNN模式则是输入数据、输出数据位数均为1bit，SNN使能为1。对应关系如下表所列：
 
-<p align="center">计算核配置项与工作模式对应表</p>
-<div class="center">
-
 |            模式             | `input_width` | `spike_width` | `snn_en` |
 | :-------------------------: | :-----------: | :-----------: | :------: |
 |            BANN             |       0       |       0       |    0     |
@@ -97,16 +81,11 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Fa
 |            BANN             |       1       |       1       |    0     |
 |          Undefined          |       1       |      0/1      |    1     |
 
-</div>
-
 - `input_width`：处理核输入数据位数，1或8。为1表示该处理核的输入数据为脉冲，反之为 8bit 无符号数。默认为1。
 - `spike_width`：神经元输出数据位数，1或8。为1表示该处理核输出数据（从神经元输出）为脉冲，反之为 8bit 无符号数。默认为1。
 - `snn_en`：SNN 模式使能。当开启时，神经元内的计算保留上一时刻膜电平信息，反之不保留（ANN 计算模式不需要上一时刻膜电平信息）。默认为 `True`。
 - `bit_truncation`：神经元输出的 8bit 无符号数的截断位置。默认为8，该参数仅在 `spike_width=8` 时生效。由于膜电平为 30bit 有符号数，因此需要截取 8bit 作为神经元最终的输出。若膜电平最高有效位大于所截取的位置，则输出255。该截断操作类似于有上限的斜率可调的 Relu 操作。`bit_truncation` 与截取位置的对应关系如下表所列：
 
-<p align="center">截取位置对应表</p>
-<div class="center">
-
 | `bit_truncation` |   截取位置    |
 | :--------------: | :-----------: |
 |        0         |     8'h0      |
@@ -118,15 +97,13 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Fa
 |        ……        |      ……       |
 |        29        |    [28:21]    |
 
-</div>
-
 #### LIF
 
 LIF 神经元实现了“泄露-积分-发射”神经元模型，其调用方式及参数如下：
 
 ```python
-n1 = pb.LIF(shape=128, threshold=127, reset_v=0, leak_v=-1, neg_threshold=0, keep_shape=False, name='n1')
-n2 = pb.LIF(shape=128, threshold=10, reset_v=1, bias=-1, keep_shape=True, name='n2')
+n1 = pb.LIF(shape=128, threshold=127, reset_v=0, leak_v=-1, neg_threshold=0, name='n1')
+n2 = pb.LIF(shape=128, threshold=10, reset_v=1, bias=-1, name='n2')
 ```
 
 - `leak_v`：泄露，有符号数。
@@ -138,7 +115,7 @@ n2 = pb.LIF(shape=128, threshold=10, reset_v=1, bias=-1, keep_shape=True, name='
 Tonic Spiking 神经元可以实现对持续脉冲刺激的周期性反应。
 
 ```python
-n1 = pb.TonicSpiking(shape=128, fire_step=3, keep_shape=False, name='n1')
+n1 = pb.TonicSpiking(shape=128, fire_step=3, name='n1')
 ```
 
 - `fire_step`：发放时间，每接收到 `N` 次刺激后发放脉冲。
@@ -148,16 +125,44 @@ n1 = pb.TonicSpiking(shape=128, fire_step=3, keep_shape=False, name='n1')
 Phasic Spiking 神经元可以实现，在接受一定数量脉冲后发放，然后保持静息状态，不再发放。
 
 ```python
-n1 = pb.PhasicSpiking(shape=128, fire_step=3, neg_floor=-10, keep_shape=False, name='n1')
+n1 = pb.PhasicSpiking(shape=128, fire_step=3, neg_floor=-10, name='n1')
 ```
 
 - `fire_step`：发放时间，每接收到 `N` 次刺激后发放脉冲。
 - `neg_floor`：地板阈值，有符号负数。当发放脉冲后，膜电位将永远保持在地板阈值。
 
+#### Bypass Neuron
+
+正阈值为1，负阈值、复位电平、泄露均为0的神经元。它的输出等于输入。
+
+```python
+n1 = pb.BypassNeuron(shape=128, name='n1')
+```
+
 #### Spiking Relu
 
+⚠️ 即将弃用，请使用 `BypassNeuron`
+
 SNN 模式下，具有 Relu 功能的神经元。当输入为1，则输出为1；输入为非正整数，输出为0。
 
+#### ANN Neuron
+
+`LIF` 的子类，在 ANN 模式下调用。`bit_truncation=8`，且预设 `input_width=8`，`spike_width=8` 以及 `snn_en=False`。
+
+```python
+n1 = pb.ANNNeuron(shape=128, bias=1, bit_trunc=9, name='n1')
+```
+
+其中，`bias` 与 `bit_trunc` 的含义参见前述。
+
+#### ANN Bypass Neuron
+
+`ANNNeuron` 的子类，在 ANN 模式下调用，可作为直通神经元使用。`bias=0`，`bit_truncation=8`。
+
+```python
+n1 = pb.ANNBypassNeuron(shape=128, name='n1')
+```
+
 ### 突触
 
 #### 全连接 FullConn
@@ -618,9 +623,9 @@ print(output)
 
 功能模块均支持 `delay`，`tick_wait_start`，`tick_wait_end`，`keep_shape` 参数。
 
-### 逻辑运算
+### 逻辑位运算
 
-逻辑运算模块实现了 `numpy` 中的位逻辑运算操作（例如 `&` 与 `numpy.bitwise_and` 等），可对接收到的一或多个输出脉冲进行逻辑运算，并产生脉冲输出。PAIBox 提供了逻辑与、或、非、异或：`BitwiseAND`，`BitwiseOR`，`BitwiseNOT`，`BitwiseXOR`。以位与为例：
+逻辑位运算模块实现了 `numpy` 中的位逻辑运算操作（例如 `&` 与 `numpy.bitwise_and` 等），可对接收到的一或多个输出脉冲进行逻辑运算，并产生脉冲输出。PAIBox 提供了位与、或、非、异或：`BitwiseAND`，`BitwiseOR`，`BitwiseNOT`，`BitwiseXOR`。以位与为例：
 
 ```python
 import paibox as pb
@@ -647,21 +652,6 @@ class Net(pb.DynSysGroup):
 
 ⚠️ 模块的属性 `external_delay` 用于表示其相对于外部的内部固有延迟。这是由具体的后端构建形式决定的，不可更改。上述示例中，位与计算结果将输出至 `n3` 中。默认情况下，`n3` 将在位与计算结果输出后启动，因此其启动时间为 `and1` 的启动时间+固有延迟+1。
 
-### 延迟链
-
-用于实现神经元延迟输出。使用方式如下：
-
-```python
-n1 = pb.IF((10,), 1, 0, delay=1, tick_wait_start=1)
-n1_delay_out = pb.DelayChain(n1, chain_level=5, delay=1, tick_wait_start=2)
-n2 = pb.SpikingRelu((10,), delay=1, tick_wait_start=n1_delay_out.tick_wait_start + n1_delay_out.external_delay)
-```
-
-其中：
-
-- `neuron`：进行延迟输出的神经元。
-- `chain_level`：延迟链的级数，即延迟的时间步。注意，这与 `delay` 含义不同：延迟链内部会建立多级神经元（类似buffer），以实现数据的延迟传递，而 `delay` 会使得神经元输出寄存的位置延后，后继节点的启动时间需要提前，这将导致其在前级**有效输出**前就进行了计算。
-
 ### 2D平均/最大池化
 
 目前仅提供2D池化：`SpikingAvgPool2d`、`SpikingMaxPool2d`。以最大池化为例：
@@ -687,10 +677,18 @@ s3 = pb.FullConn(p2d, n2, conn_type=pb.SynConnType.One2One)
 
 - `threshold`：平均池化的比较阈值，芯片需要通过神经元的阈值比较间接地实现除法。当不指定时，阈值为 $\text{round}(\text{kernel\_size}/2)$。池化窗口的输入做累加后与该阈值进行比较，可等价于平均池化的操作，即 $o_j=\sum^{k-1}_{i=0}x_{ij} \ge V_{th,pos}$，其中 $k$ 为池化窗口尺寸，$x_{ij}$ 为每个池化窗口内的输入特征图元素，$o_j$ 为第 $j$ 个输出特征图元素。
 
-### \*2D平均池化（与膜电位相关）
+### 2D平均池化（膜电位相关）
 
 这是 `SpikingAvgPool2d` 的另一种实现形式。`SpikingAvgPool2d` 在每个时间步上的运算**不会造成膜电位积累**（当未发放时），因此，可以说它与时间步无关。而该平均池化实现，当未发放时，**会造成膜电位积累**，因此与时间步相关。调用 `SpikingAvgPool2dWithV`，参数与前述 `SpikingAvgPool2d` 相同。
 
+### 1D平均/最大池化
+
+请参阅2D平均/最大池化。
+
+### 1D平均池化（膜电位相关）
+
+请参阅2D平均池化（膜电位相关）。
+
 ### 脉冲加、减
 
 脉冲加减法与数的加减法存在差异。对脉冲进行加减，运算结果将在较长时间步上体现。例如，在 `T=1` 时刻两神经元均输出1，则将在 `T=2,3` 时刻产生输出脉冲。以下为脉冲加减法运算示例。其中，输入为 `T=12` 脉冲序列，输出为 `T=20` 脉冲序列。
diff --git a/docs/Guide-of-Test.md b/docs/Guide-of-Test.md
index 8146b0b4..5263ea9d 100644
--- a/docs/Guide-of-Test.md
+++ b/docs/Guide-of-Test.md
@@ -21,7 +21,7 @@ pytest = "^8.0.0"
 
 ## 常用测试夹具
 
-几个常用的与测试环境相关的夹具介绍。请将这些夹具加入到**测试项目所在目录**的 `conftest.py` 内。
+几个常用的与测试环境相关的夹具介绍。可直接在 `tests` 目录下的测试项目中使用这些夹具。
 
 1. 指定测试项目的文件输出目录，例如，输出调试日志等信息。该夹具确保创建一个目录，并返回。若目录已存在，则清空目录（可选）。
 

From 46dfe06a85ac9968553f4811b04c6d254988e6ab Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 20 Nov 2024 16:11:23 +0800
Subject: [PATCH 135/187] =?UTF-8?q?=F0=9F=94=96=20v1.2.0a2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md   | 8 +++++++-
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8274561c..5473a4ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -70,9 +70,15 @@
 
 ## v1.1.1
 
-- 修复对权重RAM错误的配置
+- 修复对权重 RAM 错误的配置
 
 ## v1.2.0a1
 
 - 提高 `paicorelib` 依赖版本至 `~1.3`
 - 支持 ANN 网络的构建与部署
+
+## v1.2.0a2
+
+- 提高 `paicorelib` 依赖版本至 `>=1.3.1`
+- 支持1D脉冲平均/最大池化算子
+- 重构路由算法，支持嵌套路由
diff --git a/pyproject.toml b/pyproject.toml
index 3788a3d3..2a0bd1bf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "paibox"
-version = "1.2.0a1"
+version = "1.2.0a2"
 description = "Toolchain of PAICORE 2.0"
 authors = ["Ziru Pan <zrpan@stu.pku.edu.cn>"]
 maintainers = [

From 1c441eea6627ed264781c5d8f7d96310cd205384 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 21 Nov 2024 15:31:59 +0800
Subject: [PATCH 136/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(synapses)?=
 =?UTF-8?q?:=20rename=20`MaxPool2dSemiFoldedSyn`=20to=20`MaxPoolSyn`,=20th?=
 =?UTF-8?q?e=20weight=20of=20max=20pooling=20syn=20must=20be=20a=20mask?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/functional.py              | 26 ++++++++++----------
 paibox/components/synapses/__init__.py       |  7 +-----
 paibox/components/synapses/base.py           |  8 +++---
 paibox/components/synapses/transforms.py     | 25 +++++++++++++------
 tests/components/synapses/test_transforms.py |  6 ++---
 5 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 9438d839..5abcd7f1 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -37,7 +37,7 @@
 from .neuron.neurons import *
 from .neuron.utils import vjt_overflow
 from .projection import InputProj
-from .synapses import ConnType, Conv2dSemiFoldedSyn, FullConnSyn, MaxPool2dSemiFoldedSyn
+from .synapses import ConnType, Conv2dSemiFoldedSyn, FullConnSyn, MaxPoolSyn
 from .synapses.conv_types import _Size1Type, _Size2Type
 from .synapses.conv_utils import _pair
 
@@ -941,7 +941,7 @@ def build(
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
                 neuron,
-                weights=_delay_mapping(in_h, in_ch),
+                weights=_delay_mapping_mask(in_h, in_ch),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1089,7 +1089,7 @@ def build(
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
                 neuron,
-                weights=_delay_mapping(in_h, cin),
+                weights=_delay_mapping_mask(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1124,7 +1124,7 @@ def build(
                 syn1 = FullConnSyn(
                     self.module_intf.operands[0],
                     neuron,
-                    weights=_delay_mapping(in_h, cin),
+                    weights=_delay_mapping_mask(in_h, cin),
                     conn_type=ConnType.All2All,
                     name=f"s{p}_pad_{self.name}",
                 )
@@ -1255,15 +1255,15 @@ def build(
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
                 neuron,
-                weights=_delay_mapping(in_h, cin),
+                weights=_delay_mapping_mask(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
             s_delays.append(syn1)
-            syn2 = MaxPool2dSemiFoldedSyn(
+            syn2 = MaxPoolSyn(
                 neuron,
                 pool2d,
-                weights=_poo2d_semifolded_mapping(
+                weights=_poo2d_semifolded_mapping_mask(
                     cin,
                     in_h,
                     self.shape_out[1],
@@ -1395,7 +1395,7 @@ def build(
             syn1 = FullConnSyn(
                 self.module_intf.operands[0],
                 neuron,
-                weights=_delay_mapping(in_h, cin),
+                weights=_delay_mapping_mask(in_h, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1403,7 +1403,7 @@ def build(
             syn2 = FullConnSyn(
                 neuron,
                 pool2d,
-                weights=_poo2d_semifolded_mapping(
+                weights=_poo2d_semifolded_mapping_mask(
                     cin, in_h, out_h, kh, self.stride, self.padding
                 ),
                 conn_type=ConnType.All2All,
@@ -1427,7 +1427,7 @@ def build(
                 syn1 = FullConnSyn(
                     self.module_intf.operands[0],
                     neuron,
-                    weights=_delay_mapping(in_h, cin),
+                    weights=_delay_mapping_mask(in_h, cin),
                     conn_type=ConnType.All2All,
                     name=f"s{p}_pad_{self.name}",
                 )
@@ -1436,7 +1436,7 @@ def build(
                 syn2 = FullConnSyn(
                     neuron,
                     pool2d,
-                    weights=-_poo2d_semifolded_mapping(
+                    weights=-_poo2d_semifolded_mapping_mask(
                         cin, in_h, out_h, kh, self.stride, self.padding
                     ),
                     conn_type=ConnType.All2All,
@@ -1556,11 +1556,11 @@ def _transpose3d_mapping(
     return mt
 
 
-def _delay_mapping(h: int, cin: int) -> WeightType:
+def _delay_mapping_mask(h: int, cin: int) -> WeightType:
     return np.eye(cin * h, dtype=WEIGHT_DTYPE)
 
 
-def _poo2d_semifolded_mapping(
+def _poo2d_semifolded_mapping_mask(
     cin: int,
     ih: int,
     oh: int,
diff --git a/paibox/components/synapses/__init__.py b/paibox/components/synapses/__init__.py
index 7459cce0..bb75d14e 100644
--- a/paibox/components/synapses/__init__.py
+++ b/paibox/components/synapses/__init__.py
@@ -1,7 +1,2 @@
-from .base import (
-    Conv2dSemiFoldedSyn,
-    FullConnectedSyn,
-    FullConnSyn,
-    MaxPool2dSemiFoldedSyn,
-)
+from .base import Conv2dSemiFoldedSyn, FullConnectedSyn, FullConnSyn, MaxPoolSyn
 from .transforms import ConnType
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index a444023a..5ef6da87 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -25,7 +25,7 @@
     MaskedLinear,
     OneToOne,
     Transform,
-    _CompareMax,
+    CompareMax,
 )
 
 RIGISTER_MASTER_KEY_FORMAT = "{0}.output"
@@ -488,7 +488,9 @@ def __init__(
         )
 
 
-class MaxPool2dSemiFoldedSyn(FullConnectedSyn):
+class MaxPoolSyn(FullConnectedSyn):
+    """Max pooling synapses. Only used when input width is 8-bit."""
+
     def __init__(
         self,
         source: Union[NeuDyn, InputProj],
@@ -497,4 +499,4 @@ def __init__(
         name: Optional[str] = None,
     ) -> None:
         super().__init__(source, dest, name)
-        self.comm = _CompareMax((self.num_in, self.num_out), weights)
+        self.comm = CompareMax((self.num_in, self.num_out), weights)
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index df9c694a..d8448fef 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -44,6 +44,7 @@
     "Conv2dSemiFoldedForward",
     "ConvTranspose1dForward",
     "ConvTranspose2dForward",
+    "CompareMax",
 ]
 
 
@@ -583,7 +584,18 @@ def connectivity(self):
         )
 
 
-class _CompareMax(AllToAll):
+class CompareMax(AllToAll):
+    def __init__(self, conn_size: Size2Type, mask: DataType) -> None:
+        """A transformation that finds the maximum of the input vector according to each column of the  \
+            mask matrix.
+
+        NOTE: the value of mask matrix must be either 0 or 1.
+        """
+        if not np.all((mask == 0) | (mask == 1)):
+            raise ValueError("the mask must be 0 or 1.")
+
+        super().__init__(conn_size, mask)
+
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         """The maximum value of the input corresponding to the non-zero columns of the weight matrix is \
             taken as the output.
@@ -592,13 +604,12 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
             y = (y1, y2, ..., ym)
         """
         if self.weights.ndim == 0:
-            output = self.weights * np.full(
-                (self.conn_size[1],), np.max(x, axis=None), dtype=VOLTAGE_DTYPE
+            output = np.full(
+                (self.conn_size[1],),
+                self.weights * np.max(x, axis=None),
+                dtype=VOLTAGE_DTYPE,
             )
         else:
-            output = np.zeros((self.conn_size[1],), dtype=VOLTAGE_DTYPE)
-            for col in range(self.conn_size[1]):
-                col_result = x * self.weights[:, col].astype(VOLTAGE_DTYPE)
-                output[col] = np.max(col_result)
+            output = np.max(x[:, None] * self.weights, axis=0).astype(VOLTAGE_DTYPE)
 
         return output
diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index 6eecdb4b..2171cf79 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -707,18 +707,16 @@ def test_ConvTranspose2dForward(
 
     @pytest.mark.parametrize("n_compare, n_group", [(4, 8), (9, 12), (25, 1)])
     def test_CompareMax(self, n_compare, n_group):
-        from paibox.components.synapses.transforms import _CompareMax
-
         n = n_compare * n_group
         w = np.zeros((n, n_group), dtype=np.int8)
         for i in range(n_group):
             w[n_compare * i : n_compare * (i + 1), i] = 1
 
-        f = _CompareMax((n, n_group), w)
+        f = tfm.CompareMax((n, n_group), w)
 
         x = np.random.randint(0, 256, size=(n_compare, n_group), dtype=np.uint8)
         y1 = f(x.ravel(order="F"))  # flatten in column-major order
-        expected = np.zeros((n_group,), dtype=np.int32)
+        expected = np.zeros((n_group,), dtype=np.uint8)
 
         for i in range(n_group):
             expected[i] = np.max(x[:, i])

From 9ea7301f4d14b0f19509f9503f77fc68d24ad1d8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 21 Nov 2024 07:32:47 +0000
Subject: [PATCH 137/187] :rotating_light: auto fix by pre-commit hooks

---
 docs/Support-Ops.md                | 74 +++++++++++++++---------------
 paibox/components/synapses/base.py |  2 +-
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/docs/Support-Ops.md b/docs/Support-Ops.md
index c1b16d62..29a06bd8 100644
--- a/docs/Support-Ops.md
+++ b/docs/Support-Ops.md
@@ -6,21 +6,21 @@
 
 芯片所支持的神经元配置项如下表所列：
 
-|         支持功能         | 可写 |         取值         | 功能描述                                                                                                                                                                         |
-| :-----------------------: | :--: | :------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-|         复位模式         |  ✅  | 硬复位/软复位/不复位 | 硬复位，膜电平重置为正/负阈值<br />软复位，膜电平将减正阈值/加负阈值（若负阈值模式为复位模式）<br />不复位，膜电平保持不变                                                       |
-|         复位电平         |  ✅  |    30比特有符号数    | 可配置复位电平                                                                                                                                                                   |
-|       比较前后泄露       |  ✅  |        前/后        | 阈值比较发生在泄露前/后                                                                                                                                                          |
-|          正阈值          |  ✅  |    29比特无符号数    | 可配置正阈值                                                                                                                                                                     |
-|          负阈值          |  ✅  |    29比特无符号数    | 可配置负阈值                                                                                                                                                                     |
-|         泄露电平         |  ✅  |    30比特有符号数    | 可配置泄露幅值                                                                                                                                                                   |
-|         反向泄露         |  ✅  |      开启/关闭      | 若开启，泄露与当前膜电平符号相关：<br />当泄露值为正，膜电平向0收敛<br />当泄露值为负，膜电平偏离0发散                                                                           |
-|        负阈值模式        |  ✅  |      复位/饱和      | 当膜电平低于负阈值时：<br />为复位模式，根据复位模式复位<br />为饱和模式，膜电平重置为负阈值                                                                                     |
+|         支持功能          | 可写 |         取值         | 功能描述                                                                                                                                                                        |
+| :-----------------------: | :--: | :------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+|         复位模式          |  ✅  | 硬复位/软复位/不复位 | 硬复位，膜电平重置为正/负阈值<br />软复位，膜电平将减正阈值/加负阈值（若负阈值模式为复位模式）<br />不复位，膜电平保持不变                                                      |
+|         复位电平          |  ✅  |    30比特有符号数    | 可配置复位电平                                                                                                                                                                  |
+|       比较前后泄露        |  ✅  |        前/后         | 阈值比较发生在泄露前/后                                                                                                                                                         |
+|          正阈值           |  ✅  |    29比特无符号数    | 可配置正阈值                                                                                                                                                                    |
+|          负阈值           |  ✅  |    29比特无符号数    | 可配置负阈值                                                                                                                                                                    |
+|         泄露电平          |  ✅  |    30比特有符号数    | 可配置泄露幅值                                                                                                                                                                  |
+|         反向泄露          |  ✅  |      开启/关闭       | 若开启，泄露与当前膜电平符号相关：<br />当泄露值为正，膜电平向0收敛<br />当泄露值为负，膜电平偏离0发散                                                                          |
+|        负阈值模式         |  ✅  |      复位/饱和       | 当膜电平低于负阈值时：<br />为复位模式，根据复位模式复位<br />为饱和模式，膜电平重置为负阈值                                                                                    |
 | 膜电平截取位（仅ANN模式） |  ✅  |        [0,29]        | 输出膜电平的截取位置T，30比特有符号膜电平需截取8比特作为输出：<br />T<8，截取[T-1:0]，低位补0 <br />T=8，截取[7:0]<br />T≤29，截取[T-1:T-8]<br />膜电平大于窗口最高位则截断处理 |
-|       随机轴突整合       |  ✅  |      开启/关闭      | 若开启，神经元根据硬件生成的随机数\*过滤一些轴突上的输入，进行选择性累加                                                                                                         |
-|         随机泄露         |  ✅  |      开启/关闭      | 若开启，如果泄露幅值小于硬件生成的随机数\*，则此次泄露为0                                                                                                                        |
-|         阈值掩码         |  ✅  |        [0,29]        | 若开启，硬件生成的随机数\*将和它求与后得到一个0\~29比特随机阈值，并加至神经元的正、负阈值上                                                                                      |
-|          膜电平          |  ❌  |          0          | 只读寄存器，初始值为0                                                                                                                                                            |
+|       随机轴突整合        |  ✅  |      开启/关闭       | 若开启，神经元根据硬件生成的随机数\*过滤一些轴突上的输入，进行选择性累加                                                                                                        |
+|         随机泄露          |  ✅  |      开启/关闭       | 若开启，如果泄露幅值小于硬件生成的随机数\*，则此次泄露为0                                                                                                                       |
+|         阈值掩码          |  ✅  |        [0,29]        | 若开启，硬件生成的随机数\*将和它求与后得到一个0\~29比特随机阈值，并加至神经元的正、负阈值上                                                                                     |
+|          膜电平           |  ❌  |          0           | 只读寄存器，初始值为0                                                                                                                                                           |
 
 \*硬件生成的随机数均为无符号数。
 
@@ -34,26 +34,26 @@
 
 |         算子类型         | ANN | SNN |     备注     |
 | :----------------------: | :-: | :-: | :----------: |
-|          全连接          | ✅ | ✅ |              |
-|        2D矩阵乘法        | ✅ | ✅ |              |
-|          1D卷积          | ✅ | ✅ |  全展开形式  |
-|          2D卷积          | ✅ | ✅ |  全展开形式  |
-|        1D转置卷积        | ✅ | ✅ |  全展开形式  |
-|        2D转置卷积        | ✅ | ✅ |  全展开形式  |
-|           位与           | ❌ | ✅ |              |
-|           位或           | ❌ | ✅ |              |
-|           位非           | ❌ | ✅ |              |
-|          位异或          | ❌ | ✅ |              |
-|        1D平均池化        | ❌ | ✅ |    脉冲化    |
-| 1D平均池化（膜电位相关） | ❌ | ✅ |    脉冲化    |
-|        1D最大池化        | ❌ | ✅ |    脉冲化    |
-|        2D平均池化        | ❌ | ✅ |    脉冲化    |
-| 2D平均池化（膜电位相关） | ❌ | ✅ |    脉冲化    |
-|        2D最大池化        | ❌ | ✅ |    脉冲化    |
-|          脉冲加          | ❌ | ✅ | 针对脉冲序列 |
-|          脉冲减          | ❌ | ✅ | 针对脉冲序列 |
-|          线性层          | ✅ | ❌ |              |
-|          2D卷积          | ✅ | ❌ |  半折叠形式  |
-|        2D最大池化        | ✅ | ❌ |  半折叠形式  |
-|        2D平均池化        | ✅ | ❌ |  半折叠形式  |
-|          线性层          | ✅ | ❌ |  半折叠形式  |
+|          全连接          | ✅  | ✅  |              |
+|        2D矩阵乘法        | ✅  | ✅  |              |
+|          1D卷积          | ✅  | ✅  |  全展开形式  |
+|          2D卷积          | ✅  | ✅  |  全展开形式  |
+|        1D转置卷积        | ✅  | ✅  |  全展开形式  |
+|        2D转置卷积        | ✅  | ✅  |  全展开形式  |
+|           位与           | ❌  | ✅  |              |
+|           位或           | ❌  | ✅  |              |
+|           位非           | ❌  | ✅  |              |
+|          位异或          | ❌  | ✅  |              |
+|        1D平均池化        | ❌  | ✅  |    脉冲化    |
+| 1D平均池化（膜电位相关） | ❌  | ✅  |    脉冲化    |
+|        1D最大池化        | ❌  | ✅  |    脉冲化    |
+|        2D平均池化        | ❌  | ✅  |    脉冲化    |
+| 2D平均池化（膜电位相关） | ❌  | ✅  |    脉冲化    |
+|        2D最大池化        | ❌  | ✅  |    脉冲化    |
+|          脉冲加          | ❌  | ✅  | 针对脉冲序列 |
+|          脉冲减          | ❌  | ✅  | 针对脉冲序列 |
+|          线性层          | ✅  | ❌  |              |
+|          2D卷积          | ✅  | ❌  |  半折叠形式  |
+|        2D最大池化        | ✅  | ❌  |  半折叠形式  |
+|        2D平均池化        | ✅  | ❌  |  半折叠形式  |
+|          线性层          | ✅  | ❌  |  半折叠形式  |
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 5ef6da87..01fd912f 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -15,6 +15,7 @@
 from .conv_utils import _fm_ndim1_check, _fm_ndim2_check
 from .transforms import (
     AllToAll,
+    CompareMax,
     ConnType,
     Conv1dForward,
     Conv2dForward,
@@ -25,7 +26,6 @@
     MaskedLinear,
     OneToOne,
     Transform,
-    CompareMax,
 )
 
 RIGISTER_MASTER_KEY_FORMAT = "{0}.output"

From a49c074106017209825dc31dcfd7e2c3496d65f3 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 26 Nov 2024 15:30:59 +0800
Subject: [PATCH 138/187] =?UTF-8?q?=F0=9F=94=A8=20use=20`.source`=20to=20g?=
 =?UTF-8?q?et=20the=20input=20info=20of=20modules?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py   | 10 +++----
 paibox/components/functional.py | 47 ++++++++++++++++++---------------
 paibox/components/modules.py    | 23 +++++++---------
 3 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 4ec40ed1..a2059b95 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -125,7 +125,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         n_delaychain.append(n_out)  # Must append to the last.
 
         syn_in = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n_delaychain[0],
             1,
             conn_type=ConnType.One2One,
@@ -312,7 +312,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
             )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_p1d,
             weights=self.tfm.connectivity.astype(np.bool_),
             conn_type=ConnType.All2All,
@@ -391,7 +391,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_p1d,
             weights=self.tfm.connectivity.astype(np.bool_),
             conn_type=ConnType.All2All,
@@ -482,7 +482,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
             )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_p2d,
             weights=self.tfm.connectivity.astype(np.bool_),
             conn_type=ConnType.All2All,
@@ -566,7 +566,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_p2d,
             weights=self.tfm.connectivity.astype(np.bool_),
             conn_type=ConnType.All2All,
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 5abcd7f1..e39cbd0e 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -123,14 +123,14 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_and,
             1,
             conn_type=ConnType.One2One,
             name=f"s0_{self.name}",
         )
         syn2 = FullConnSyn(
-            self.module_intf.operands[1],
+            self.source[1],
             n1_and,
             1,
             conn_type=ConnType.One2One,
@@ -193,7 +193,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_not,
             weights=-1,
             conn_type=ConnType.One2One,
@@ -244,14 +244,14 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_or,
             1,
             conn_type=ConnType.One2One,
             name=f"s0_{self.name}",
         )
         syn2 = FullConnSyn(
-            self.module_intf.operands[1],
+            self.source[1],
             n1_or,
             1,
             conn_type=ConnType.One2One,
@@ -308,7 +308,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         identity = np.identity(self.num_out, dtype=np.int8)
         # weight of syn1, (-1*(N,), 1*(N,))
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_aux,
             weights=np.hstack([-1 * identity, identity], casting="safe", dtype=np.int8),
             conn_type=ConnType.All2All,
@@ -316,7 +316,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
         # weight of syn2, (1*(N,), -1*(N,))
         syn2 = FullConnSyn(
-            self.module_intf.operands[1],
+            self.source[1],
             n1_aux,
             weights=np.hstack([identity, -1 * identity], casting="safe", dtype=np.int8),
             conn_type=ConnType.All2All,
@@ -415,14 +415,14 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_sadd,
             self.factor_a,
             conn_type=ConnType.One2One,
             name=f"s0_{self.name}",
         )
         syn2 = FullConnSyn(
-            self.module_intf.operands[1],
+            self.source[1],
             n1_sadd,
             self.factor_b,
             conn_type=ConnType.One2One,
@@ -712,14 +712,14 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_ssub,
             self.factor_a,
             conn_type=ConnType.One2One,
             name=f"s0_{self.name}",
         )
         syn2 = FullConnSyn(
-            self.module_intf.operands[1],
+            self.source[1],
             n1_ssub,
             self.factor_b,
             conn_type=ConnType.One2One,
@@ -781,7 +781,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_t2d,
             weights=_transpose2d_mapping(self.shape_in),
             conn_type=ConnType.All2All,
@@ -848,7 +848,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
         )
 
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             n1_t3d,
             weights=_transpose3d_mapping(self.shape_in, self.axes),
             conn_type=ConnType.All2All,
@@ -886,7 +886,7 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
             name=f"nd_{self.name}",
         )
         syn1 = FullConnSyn(
-            self.module_intf.operands[0],
+            self.source[0],
             neuron_d,
             weights=self.weights,
             conn_type=ConnType.All2All,
@@ -974,6 +974,7 @@ def __init__(
         padding: _Size2Type = 0,
         bias: DataType = 0,
         bit_trunc: int = 8,
+        *,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
@@ -981,7 +982,7 @@ def __init__(
         """2d semi-folded convolution for ANN mode.
 
         Args:
-            neuron_s: source neuron. The dimensions need to be expressed explicitly as (C,H,W).
+            neuron_s: source neuron. The dimensions need to be expressed explicitly as (C,H) or (C,W).
             kernel: convolution kernel in (O,I,H,W) order.
             stride: the step size of the kernel sliding. It can be a scalar or a tuple of 2 integers.
             padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 2 integers.
@@ -1036,8 +1037,8 @@ def build(
         ts_first_valid_inp: int,
         **build_options,
     ) -> BuiltComponentType:
-        assert len(self.module_intf.operands[0].shape_out) == 2
-        # if len(self.module_intf.operands[0].shape_out) != 2:
+        assert len(self.source[0].shape_out) == 2
+        # if len(self.source[0].shape_out) != 2:
         #     in_ch, in_h, in_w = _fm_ndim2_check(
         #         self.module_intf.operands[0].shape_out, "CHW"
         #     )
@@ -1087,7 +1088,7 @@ def build(
             n_delays.append(neuron)
             # delay synapses
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],
+                self.source[0],
                 neuron,
                 weights=_delay_mapping_mask(in_h, cin),
                 conn_type=ConnType.All2All,
@@ -1122,7 +1123,7 @@ def build(
                 n_neg_padding.append(neuron)
                 # delay synapses
                 syn1 = FullConnSyn(
-                    self.module_intf.operands[0],
+                    self.source[0],
                     neuron,
                     weights=_delay_mapping_mask(in_h, cin),
                     conn_type=ConnType.All2All,
@@ -1162,6 +1163,7 @@ def __init__(
         neuron_s: Union[NeuDyn, InputProj],
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
+        *,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
@@ -1253,7 +1255,7 @@ def build(
             n_delays.append(neuron)
             # delay synapses
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],
+                self.source[0],
                 neuron,
                 weights=_delay_mapping_mask(in_h, cin),
                 conn_type=ConnType.All2All,
@@ -1290,6 +1292,7 @@ def __init__(
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
         padding: _Size2Type = 0,
+        *,
         keep_shape: bool = False,
         name: Optional[str] = None,
         **kwargs,
@@ -1393,7 +1396,7 @@ def build(
             n_delays.append(neuron)
             # delay synapses
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],
+                self.source[0],
                 neuron,
                 weights=_delay_mapping_mask(in_h, cin),
                 conn_type=ConnType.All2All,
@@ -1425,7 +1428,7 @@ def build(
                 n_neg_padding.append(neuron)
                 # delay synapses
                 syn1 = FullConnSyn(
-                    self.module_intf.operands[0],
+                    self.source[0],
                     neuron,
                     weights=_delay_mapping_mask(in_h, cin),
                     conn_type=ConnType.All2All,
diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 09bdf247..41e06fdc 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -4,7 +4,7 @@
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 from functools import partial
-from typing import ClassVar, Literal, Optional, TypeVar, Union
+from typing import Callable, ClassVar, Literal, Optional, TypeVar, Union
 
 import numpy as np
 from paicorelib import TM, CoreMode, HwConfig, SNNModeEnable, get_core_mode
@@ -100,6 +100,7 @@ def __init__(
         tick_wait_start: int,
         tick_wait_end: int,
         unrolling_factor: int,
+        keep_shape: bool,
         name: Optional[str] = None,
     ) -> None:
         super().__init__(name)
@@ -108,6 +109,7 @@ def __init__(
         self._tws = tick_wait_start
         self._twe = tick_wait_end
         self._uf = unrolling_factor
+        self.keep_shape = keep_shape
 
     def __call__(self, *args, **kwargs):
         return self.update(*args, **kwargs)
@@ -179,9 +181,7 @@ def __init__(
 
                 op.register_output(self)
 
-        super().__init__(**kwargs, name=name)
-
-        self.keep_shape = keep_shape
+        super().__init__(**kwargs, keep_shape=keep_shape, name=name)
         self._shape_out = shape_out
         self.register_operand(*operands)
 
@@ -198,12 +198,7 @@ def __init__(
         # Set a deque for the `synin` to implement the delay of `inherent_delay` for the module.
         if self.inherent_delay > 0:
             _init_synin = [
-                self.n_op
-                * [
-                    np.zeros(
-                        self.module_intf.operands[0].num_out, dtype=NEUOUT_U8_DTYPE
-                    )
-                ]
+                self.n_op * [np.zeros(self.source[0].num_out, dtype=NEUOUT_U8_DTYPE)]
             ]
         else:
             _init_synin = []
@@ -215,7 +210,7 @@ def __init__(
     def get_inputs(self) -> None:
         synin = []
 
-        for op in self.module_intf.operands:
+        for op in self.source:
             # Retrieve the spike at index `timestamp` of the dest neurons
             if self.is_working():
                 if isinstance(op, InputProj):
@@ -255,7 +250,7 @@ def _rebuild_out_intf(
     ) -> None:
         from .synapses import FullConnectedSyn
 
-        for out in self.module_intf.output:
+        for out in self.target:
             if isinstance(out, FullConnectedSyn):
                 out.source = out_neuron
             else:
@@ -443,7 +438,9 @@ def __init__(
 _T = TypeVar("_T", bound=NeuModule)
 
 
-def set_rt_mode(input_width: L[1, 8], spike_width: L[1, 8], snn_en: L[0, 1]):
+def set_rt_mode(
+    input_width: L[1, 8], spike_width: L[1, 8], snn_en: L[0, 1]
+) -> Callable[[type[_T]], type[_T]]:
     def wrapper(cls: type[_T]) -> type[_T]:
         iw = _input_width_format(input_width)
         sw = _spike_width_format(spike_width)

From cf939cff919f522e41ac8eee535f30c24a42ebd7 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 26 Nov 2024 16:10:54 +0800
Subject: [PATCH 139/187] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(PAIGraph)?=
 =?UTF-8?q?:=20building=20process=20of=20modules,especially=20semi-folded?=
 =?UTF-8?q?=20ops?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py        |  36 +++++--
 paibox/components/_modules.py   |  50 +++++++---
 paibox/components/functional.py | 172 ++++++++++++++------------------
 paibox/network.py               | 119 +++++++++++++---------
 4 files changed, 210 insertions(+), 167 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index b0c1950d..335b95cf 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -8,6 +8,7 @@
 
 from paibox.collector import Collector
 from paibox.components import FullConnectedSyn, InputProj, NeuModule, Neuron
+from paibox.components.functional import LinearSemiFolded
 from paibox.exceptions import GraphBuildError, GraphConnectionError, NotSupportedError
 from paibox.network import DynSysGroup
 from paibox.utils import check_elem_unique
@@ -113,9 +114,32 @@ def _pre_build(self, **build_options) -> None:
         # Check the hardware resource limits of operators in the network during the build phase.
         build_options.setdefault("check_before_compile", True)
 
-        # Build functional modules in the subnets
-        for subnet in self._raw_networks:
-            DynSysGroup.build_fmodule(subnet, **build_options)
+        # Build functional modules for each network.
+        for network in self._raw_networks:
+            if network.is_composed_of_semi_folded_ops():
+                modules = network.components.subset(NeuModule)
+                succ_dg_semi_ops = {
+                    name: [t.name for t in op.target] for name, op in modules.items()
+                }
+                pred_dg_semi_ops = reverse_edges(succ_dg_semi_ops)
+
+                # XXX Networks consisting entirely of semi-folded operators require some additional topology
+                # checks. These additional checks may be removed as more network structures will be supported.
+
+                # Currently, `LinearSemiFolded` is at the end of the network, since it will change the form of
+                # the input data stream, and its effective output is at the same time.
+                semi_linears = modules.subset(LinearSemiFolded)
+                if not all(
+                    len(succ_dg_semi_ops[linear]) == 0 for linear in semi_linears
+                ):
+                    raise NotSupportedError(
+                        "currently, the semi-folded linear can only be used as output of the network."
+                    )
+
+                ordered_nodes = [modules[name] for name in toposort(succ_dg_semi_ops)]
+                network.build_modules(pred_dg_semi_ops, ordered_nodes, **build_options)
+            else:
+                network.build_modules(**build_options)
 
     def _update_graph(self, **build_options) -> None:
         self.clear(total=False)
@@ -125,7 +149,7 @@ def _update_graph(self, **build_options) -> None:
             self.succ_dg[node] = dict()
             self.pred_dg[node] = dict()
 
-        for syn in self._raw_edges.values():
+        for name, syn in self._raw_edges.items():
             u, v = syn.source.name, syn.dest.name
             if u not in self._raw_nodes:
                 raise GraphConnectionError(
@@ -138,6 +162,7 @@ def _update_graph(self, **build_options) -> None:
                 )
 
             _edge_attr = EdgeAttr(edge=syn, distance=syn.source.delay_relative)
+            self.edges[name] = _edge_attr
             self.succ_dg[u][v] = _edge_attr
             self.pred_dg[v][u] = _edge_attr
 
@@ -158,9 +183,6 @@ def _update_graph(self, **build_options) -> None:
                 degree=self.degree_of_nodes[name],
             )
 
-        for name, syn in self._raw_edges.items():
-            self.edges[name] = EdgeAttr(edge=syn, distance=syn.source.delay_relative)
-
         self.ordered_nodes = toposort(self.succ_dg)
         self.has_built = True
 
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index a2059b95..e20b6aa6 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,6 +1,7 @@
+from dataclasses import dataclass
 import math
 import typing
-from typing import Literal, Optional, Protocol, Union
+from typing import Literal, Optional, Union
 
 import numpy as np
 from paicorelib import TM, HwConfig
@@ -57,6 +58,7 @@
     "_SpikingPool2dWithV",
     "_SemiFoldedModule",
     "_LinearBase",
+    "SemiFoldedStreamAttr",
 ]
 
 
@@ -159,24 +161,44 @@ class _DelayChainANN(_DelayChainBase):
     pass
 
 
-class _HasSemiFoldedIntf(Protocol):
-    """The front of this module has replication & delay interface for semi-folded operators."""
+@dataclass(frozen=True)
+class SemiFoldedStreamAttr:
+    """Details of transmission of valid data in semi-folded form data stream."""
+
+    t_1st_vld: int
+    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
+    interval: int
+    """The interval of the output data stream."""
+    n_data: int = 0
+    """The number of valid output data."""
+
+    def t_at(self, n: int) -> int:
+        """The time of the n-th valid data."""
+        if self.n_data > 0:
+            assert 1 <= n <= self.n_data
+
+        return self.t_1st_vld + (n - 1) * self.interval
+
+    @property
+    def t_last_vld(self) -> int:
+        """The time of the last valid data."""
+        assert self.n_data > 0
+        return self.t_at(self.n_data)
+
+
+@set_rt_mode_ann()
+class _SemiFoldedModule(FunctionalModule):
+    """Functional modules with interfaces in semi-folded form. Use `build()` of class `HasSemiFoldedIntf`."""
+
+    ostream_attr: SemiFoldedStreamAttr
 
     def build(
         self,
         network: "DynSysGroup",
-        valid_interval: int,
-        ts_first_valid_inp: int,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
-    ) -> BuiltComponentType: ...
-
-
-@set_rt_mode_ann()
-class _SemiFoldedModule(FunctionalModule, _HasSemiFoldedIntf):
-    valid_interval: int = 1
-    """The interval of valid output data."""
-    ts_1st_valid_out: int = 0
-    """The timestamp of the first valid output data."""
+    ) -> BuiltComponentType:
+        raise NotImplementedError
 
     def _input_buffer_len_check(
         self, in_channels: int, in_h: int, kw: int, valid_interval: int
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index e39cbd0e..423af93c 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -902,16 +902,16 @@ def build(self, network: "DynSysGroup", **build_options) -> BuiltComponentType:
 class LinearSemiFolded(_LinearBase, _SemiFoldedModule):
     "This operator is used on the first fully-connected layer after the semi-folded convolution."
 
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        raise NotImplementedError
-
     def build(
-        self, network: "DynSysGroup", valid_interval: int, **build_options
+        self,
+        network: "DynSysGroup",
+        incoming_stream_attr: SemiFoldedStreamAttr,
+        **build_options,
     ) -> BuiltComponentType:
-        assert len(self.module_intf.operands[0].shape_out) == 2
-        self.valid_interval = valid_interval
+        assert len(self.source[0].shape_out) == 2
+        self.ostream_attr = incoming_stream_attr
 
-        in_ch, in_h = self.module_intf.operands[0].shape_out
+        ich, ih = self.source[0].shape_out
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
@@ -927,10 +927,10 @@ def build(
             name=f"nd_{self.name}",
         )
 
-        for i in range(in_h):
+        for i in range(ih):
             neuron = ANNBypassNeuron(
-                shape=(in_ch, in_h),
-                delay=valid_interval * i + 1,
+                shape=(ich, ih),
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=self.tick_wait_end,
                 keep_shape=self.keep_shape,
@@ -939,15 +939,15 @@ def build(
             n_delays.append(neuron)
             # Delay synapses
             syn1 = FullConnSyn(
-                self.module_intf.operands[0],
+                self.source[0],
                 neuron,
-                weights=_delay_mapping_mask(in_h, in_ch),
+                weights=_delay_mapping_mask(ih, ich),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
             s_delays.append(syn1)
 
-            w = self.weights[in_h - i - 1 :: in_h, :]
+            w = self.weights[ih - i - 1 :: ih, :]
             syn2 = FullConnSyn(
                 neuron,
                 n_fc,
@@ -1027,37 +1027,31 @@ def __init__(
             neuron_s, shape_out=_shape_out, keep_shape=keep_shape, name=name, **kwargs
         )
 
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        raise NotImplementedError
-
     def build(
         self,
         network: "DynSysGroup",
-        valid_interval: int,
-        ts_first_valid_inp: int,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
         # if len(self.source[0].shape_out) != 2:
         #     in_ch, in_h, in_w = _fm_ndim2_check(
-        #         self.module_intf.operands[0].shape_out, "CHW"
+        #         self.source[0].shape_out, "CHW"
         #     )
-        #     self.module_intf.operands[0].shape_change((in_ch, in_h))
-        self.valid_interval = valid_interval
-        _, in_h = self.module_intf.operands[0].shape_out
+        #     self.source[0].shape_change((in_ch, in_h))
+        _, ih = self.source[0].shape_out
         _, cin, _, kw = self.kernel.shape
+        _, ow = self.shape_out
 
-        self.ts_1st_valid_out = (
-            ts_first_valid_inp + (kw - 1 - self.padding[0]) * valid_interval
-        )
-        twe = (
-            1
-            + self.ts_1st_valid_out
-            + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        self.ostream_attr = SemiFoldedStreamAttr(
+            incoming_stream_attr.t_at(kw - self.padding[0]),
+            incoming_stream_attr.interval * self.stride[1],
+            ow,
         )
+        twe = 1 + self.ostream_attr.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, in_h, kw, valid_interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
 
         n_delays = NodeList()
         n_neg_padding = NodeList()
@@ -1078,11 +1072,10 @@ def build(
 
         for i in range(kw):
             neuron = ANNBypassNeuron(
-                (cin, in_h),
-                delay=valid_interval * i + 1,
+                (cin, ih),
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=twe,
-                keep_shape=self.keep_shape,
                 name=f"n{i}_delay_{self.name}",
             )
             n_delays.append(neuron)
@@ -1090,7 +1083,7 @@ def build(
             syn1 = FullConnSyn(
                 self.source[0],
                 neuron,
-                weights=_delay_mapping_mask(in_h, cin),
+                weights=_delay_mapping_mask(ih, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1108,15 +1101,15 @@ def build(
             s_kernel.append(syn2)
 
         # Add additional negative padding layer to eliminate the incorrect output
-        # NOTE: ts_first_valid_inp = 0 & padding[0] > 0 means the previous layer is
+        # NOTE: `t_1st_vld` = 0 & `padding[0]` > 0 means the previous layer is
         # an input node. No need to add negative padding layer for this case.
-        if ts_first_valid_inp > 0:
+        if incoming_stream_attr.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
-                    (cin, in_h),
-                    delay=valid_interval * (kw - 1 - p) + 1,
+                    (cin, ih),
+                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=ts_first_valid_inp,
+                    tick_wait_end=incoming_stream_attr.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1125,7 +1118,7 @@ def build(
                 syn1 = FullConnSyn(
                     self.source[0],
                     neuron,
-                    weights=_delay_mapping_mask(in_h, cin),
+                    weights=_delay_mapping_mask(ih, cin),
                     conn_type=ConnType.All2All,
                     name=f"s{p}_pad_{self.name}",
                 )
@@ -1198,37 +1191,31 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        raise NotImplementedError
-
     def build(
         self,
         network: "DynSysGroup",
-        valid_interval: int,
-        ts_first_valid_inp: int,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
-        assert len(self.module_intf.operands[0].shape_out) == 2
-        # if len(self.module_intf.operands[0].shape_out) != 2:
+        assert len(self.source[0].shape_out) == 2
+        # if len(self.source[0].shape_out) != 2:
         #     in_ch, in_h, in_w = _fm_ndim2_check(
-        #         self.module_intf.operands[0].shape_out, "CHW"
+        #         self.source[0].shape_out, "CHW"
         #     )
-        #     self.module_intf.operands[0].shape_change((in_ch, in_h))
-        self.valid_interval = valid_interval
-
-        in_ch, in_h = self.module_intf.operands[0].shape_out
-        cin = in_ch
-        _, kw = self.kernel_size
-
-        self.ts_1st_valid_out = ts_first_valid_inp + (kw - 1) * valid_interval
-        twe = (
-            1
-            + self.ts_1st_valid_out
-            + (self.shape_out[1] - 1) * valid_interval * self.stride[1]
+        #     self.source[0].shape_change((in_ch, in_h))
+        cin, ih = self.source[0].shape_out
+        kh, kw = self.kernel_size
+        _, ow = self.shape_out
+
+        self.ostream_attr = SemiFoldedStreamAttr(
+            incoming_stream_attr.t_at(kw),
+            incoming_stream_attr.interval * self.stride[1],
+            ow,
         )
+        twe = 1 + self.ostream_attr.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, in_h, kw, valid_interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
 
         n_delays = NodeList()
         s_delays = NodeList()
@@ -1245,8 +1232,8 @@ def build(
 
         for i in range(kw):
             neuron = ANNBypassNeuron(
-                (cin, in_h),
-                delay=valid_interval * i + 1,
+                (cin, ih),
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=twe,
                 keep_shape=self.keep_shape,
@@ -1257,7 +1244,7 @@ def build(
             syn1 = FullConnSyn(
                 self.source[0],
                 neuron,
-                weights=_delay_mapping_mask(in_h, cin),
+                weights=_delay_mapping_mask(ih, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1266,12 +1253,7 @@ def build(
                 neuron,
                 pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
-                    cin,
-                    in_h,
-                    self.shape_out[1],
-                    self.kernel_size[0],
-                    self.stride,
-                    (0, 0),
+                    cin, ih, ow, kh, self.stride, (0, 0)
                 ),
                 name=f"s{i}_{self.name}",
             )
@@ -1327,35 +1309,31 @@ def __init__(
             **kwargs,
         )
 
-    def spike_func(self, x1: NeuOutType, **kwargs) -> NeuOutType:
-        raise NotImplementedError
-
     def build(
         self,
         network: "DynSysGroup",
-        valid_interval: int,
-        ts_first_valid_inp: int,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
-        assert len(self.module_intf.operands[0].shape_out) == 2
-        # if len(self.module_intf.operands[0].shape_out) != 2:
+        assert len(self.source[0].shape_out) == 2
+        # if len(self.source[0].shape_out) != 2:
         #     in_ch, in_h, in_w = _fm_ndim2_check(
-        #         self.module_intf.operands[0].shape_out, "CHW"
+        #         self.source[0].shape_out, "CHW"
         #     )
-        #     self.module_intf.operands[0].shape_change((in_ch, in_h))
-        self.valid_interval = valid_interval
-        in_ch, in_h = self.module_intf.operands[0].shape_out
-        cin = in_ch
+        #     self.source[0].shape_change((in_ch, in_h))
+        cin, ih = self.source[0].shape_out
         kh, kw = self.kernel_size
-        out_h = self.shape_out[1]
+        _, ow = self.shape_out
 
-        self.ts_1st_valid_out = (
-            ts_first_valid_inp + (kw - 1 - self.padding[0]) * valid_interval
+        self.ostream_attr = SemiFoldedStreamAttr(
+            incoming_stream_attr.t_at(kw - self.padding[0]),
+            incoming_stream_attr.interval * self.stride[1],
+            ow,
         )
-        twe = 1 + self.ts_1st_valid_out + (out_h - 1) * valid_interval * self.stride[1]
+        twe = 1 + self.ostream_attr.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, in_h, kw, valid_interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
 
         # NOTE: Division is achieved with the help of output truncation.
         # TODO Since division with a divisor that is an integer power of 2 can only be implemented by
@@ -1386,8 +1364,8 @@ def build(
         )
         for i in range(kw):
             neuron = ANNBypassNeuron(
-                (cin, in_h),
-                delay=valid_interval * i + 1,
+                (cin, ih),
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=twe,
                 keep_shape=self.keep_shape,
@@ -1398,7 +1376,7 @@ def build(
             syn1 = FullConnSyn(
                 self.source[0],
                 neuron,
-                weights=_delay_mapping_mask(in_h, cin),
+                weights=_delay_mapping_mask(ih, cin),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1407,7 +1385,7 @@ def build(
                 neuron,
                 pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
-                    cin, in_h, out_h, kh, self.stride, self.padding
+                    cin, ih, ow, kh, self.stride, self.padding
                 ),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
@@ -1415,13 +1393,13 @@ def build(
             s_delays.append(syn2)
 
         # Add additional negative padding layer to eliminate the incorrect output
-        if ts_first_valid_inp > 0:
+        if incoming_stream_attr.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
-                    (cin, in_h),
-                    delay=valid_interval * (kw - 1 - p) + 1,
+                    (cin, ih),
+                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=ts_first_valid_inp,
+                    tick_wait_end=incoming_stream_attr.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1430,7 +1408,7 @@ def build(
                 syn1 = FullConnSyn(
                     self.source[0],
                     neuron,
-                    weights=_delay_mapping_mask(in_h, cin),
+                    weights=_delay_mapping_mask(ih, cin),
                     conn_type=ConnType.All2All,
                     name=f"s{p}_pad_{self.name}",
                 )
@@ -1440,7 +1418,7 @@ def build(
                     neuron,
                     pool2d,
                     weights=-_poo2d_semifolded_mapping_mask(
-                        cin, in_h, out_h, kh, self.stride, self.padding
+                        cin, ih, ow, kh, self.stride, self.padding
                     ),
                     conn_type=ConnType.All2All,
                     name=f"neg_s{i}_{self.name}",
diff --git a/paibox/network.py b/paibox/network.py
index e23defd1..4f25a48c 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -1,17 +1,15 @@
+from collections.abc import Sequence
 import sys
 from typing import Optional, Union
 
 import numpy as np
 
+from .exceptions import NotSupportedError
+
 from .base import DynamicSys, SynSys
 from .collector import Collector
+from .components._modules import _SemiFoldedModule, SemiFoldedStreamAttr
 from .components import NeuModule, Neuron, Projection
-from .components.functional import (
-    AvgPool2dSemiFolded,
-    Conv2dSemiFolded,
-    LinearSemiFolded,
-    MaxPool2dSemiFolded,
-)
 from .components.modules import BuiltComponentType
 from .mixin import Container
 from .node import NodeDict, NodeList
@@ -81,39 +79,65 @@ def reset_state(self) -> None:
     def __call__(self, **kwargs) -> None:
         return self.update(**kwargs)
 
-    @classmethod
-    def build_fmodule(
-        cls, network: "DynSysGroup", **build_options
+    def build_modules(
+        self,
+        pred_dg_semi_ops: Optional[dict[str, list[str]]] = None,
+        ordered_semi_ops: Optional[list[NeuModule]] = None,
+        **build_options,
     ) -> dict[NeuModule, BuiltComponentType]:
+        """Build the functional modules in the network.
+
+        Args:
+            pred_dg_semi_ops (dict[str, list[str]], None): The predecessor directed graph of semi-folded operators.
+            ordered_semi_ops (list[NeuModule], None): The ordered semi-folded operators.
+
+        Returns:
+            built_components (dict[NeuModule, BuiltComponentType]): The dictionary of generated basic components after building.
+        """
+        if pred_dg_semi_ops is not None and ordered_semi_ops is not None:
+            # It is the network composed of all semi-folded operators.
+            modules = ordered_semi_ops
+        else:
+            # It is the network composed of general operators.
+            modules = list(self.components.subset(NeuModule).unique().values())
+
         generated = dict()
-        modules = network.nodes().subset(NeuModule).unique()
-
-        # Valid interval for semi-folded components
-        # If the input data is input continuously on the W-axis, the initial
-        # valid interval for the first semi-folded component is 1.
-        semi_valid_interval = 1
-        ts_1st_valid_out = 0
-
-        for module in modules.values():
-            if isinstance(
-                module, (Conv2dSemiFolded, MaxPool2dSemiFolded, AvgPool2dSemiFolded)
-            ):
-                generated[module] = module.build(
-                    network, semi_valid_interval, ts_1st_valid_out, **build_options
-                )
-                semi_valid_interval *= module.stride[1]
-                ts_1st_valid_out = module.ts_1st_valid_out
-            elif isinstance(module, LinearSemiFolded):
-                generated[module] = module.build(
-                    network, semi_valid_interval, **build_options
-                )
-            else:
-                generated[module] = module.build(network, **build_options)
 
-        network._remove_modules_from_containers(network, modules)
+        # For external input stream info:
+        # 1. The start time is 1
+        # 2. The interval is 1
+        # 3. The #N of data is -1 since it dosen't effect the subsequent output stream.
+        # TODO Reserve an interface for setting the properties of external input from `FRONTEND_ENV`?
+        last_vld_output_attr = SemiFoldedStreamAttr(0, 1)
+
+        for m in modules:
+            # TODO for the case of the ResBlock, the `pred_dg_semi_ops` will be used.
+            if isinstance(m, _SemiFoldedModule):
+                generated[m] = m.build(self, last_vld_output_attr, **build_options)
+                last_vld_output_attr = m.ostream_attr
+            else:
+                generated[m] = m.build(self, **build_options)
 
+        self._remove_modules(modules)
         return generated
 
+    def is_composed_of_semi_folded_ops(self) -> bool:
+        """Check if the network consists entirely or not of semi-folded operators. Return true if all the \
+            components are semi-folded operators. Return false if all the components are not semi-folded. \
+            In other cases, an exception will be raised.
+        """
+        if all(isinstance(cpn, _SemiFoldedModule) for cpn in self.components.values()):
+            return True
+        elif not all(
+            isinstance(cpn, _SemiFoldedModule) for cpn in self.components.values()
+        ):
+            return False
+        else:
+            # XXX It seems that there will be no network mixed with semi-folded operators at present.
+            raise NotSupportedError(
+                "mixed semi-folded & normal operators in the network is not supported."
+            )
+
     def _add_components(self, *implicit: DynamicSys, **explicit: DynamicSys) -> None:
         """Add new components. When the component is passed in explicitly, its tag name can \
             be specified. When passing in implicitly, its attribute `.name` will be used.
@@ -141,22 +165,19 @@ def _ignore_components(self, *components: DynamicSys) -> None:
             if cpn in self.__dict__.values():
                 cpn.__gh_build_ignore__ = True
 
-    @staticmethod
-    def _remove_modules_from_containers(
-        network: "DynSysGroup", modules: Collector[str, NeuModule]
-    ) -> None:
-        """Remove the built modules from the node containers of the network."""
-        node_lists = [v for v in network.__dict__.values() if isinstance(v, NodeList)]
-        node_dicts = [v for v in network.__dict__.values() if isinstance(v, NodeDict)]
-
-        for module in modules.values():
-            for lst in node_lists:
-                if module in lst:
-                    lst.remove(module)
-
-            for dct in node_dicts:
-                if module in dct.values():
-                    dct.pop(module)
+    def _remove_modules(self, modules: Sequence[NeuModule]) -> None:
+        """Remove the built modules from the network."""
+        node_lst = [v for v in self.__dict__.values() if isinstance(v, NodeList)]
+        node_dct = [v for v in self.__dict__.values() if isinstance(v, NodeDict)]
+
+        for m in modules:
+            for lst in node_lst:
+                if m in lst:
+                    lst.remove(m)
+
+            for dct in node_dct:
+                if m in dct.values():
+                    dct.pop(m)
 
     @property
     def components(self) -> Collector[str, DynamicSys]:

From 2498cae93e8ce01707b674198a268b29bf50c096 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 26 Nov 2024 16:16:15 +0800
Subject: [PATCH 140/187] =?UTF-8?q?=E2=9C=85=20test(functional):=20sync=20?=
 =?UTF-8?q?changes=20of=20semi-folded=20ops=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/components/test_functional.py | 102 ++++++++++++++--------------
 1 file changed, 50 insertions(+), 52 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 15df0663..c2c63a82 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -31,7 +31,7 @@ def _assert_build_fmodule(
     assert len(nodes) == n_node_bef_build
 
     # Construct the functional modules
-    DynSysGroup.build_fmodule(network)
+    network.build_modules()
 
     # Must exclude `NeuModule`, because it may be in the `__dict__` of probe
     nodes = network.nodes().subset(DynamicSys).exclude(NeuModule).unique()
@@ -106,7 +106,7 @@ def test_BitwiseAND(self):
         net2 = FunctionalModule_2to1_Net("and")
         bitwise = net1.bitwise
         func = net2.func_node
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -147,7 +147,7 @@ def test_BitwiseNOT(self):
         net2 = FunctionalModule_1to1_Net("not")
         bitwise = net1.bitwise
         func = net2.func_node
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -187,7 +187,7 @@ def test_BitwiseOR(self):
         net2 = FunctionalModule_2to1_Net("or")
         bitwise = net1.bitwise
         func = net2.func_node
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -228,7 +228,7 @@ def test_BitwiseXOR(self):
         net2 = FunctionalModule_2to1_Net("xor")
         bitwise = net1.bitwise
         func = net2.func_node
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -269,7 +269,7 @@ def test_DelayChain(self):
         net2 = FunctionalModule_1to1_Net("delay")
         bitwise = net1.bitwise
         func = net2.func_node
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -309,7 +309,7 @@ def test_SpikingAdd(self):
         net1 = FunctionalModule_2to1_Net("add")
         net2 = FunctionalModule_2to1_Net("add")
         func = net2.func_node
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -360,7 +360,7 @@ def test_SpikingSub(self):
         net1 = FunctionalModule_2to1_Net("sub")
         net2 = FunctionalModule_2to1_Net("sub")
         func = net2.func_node
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -448,7 +448,7 @@ def test_SpikingPool1d(
         net1 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
         net2 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
         p1d = net2.pool
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -558,7 +558,7 @@ def test_SpikingPool2d(
         net1 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
         net2 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, pool_type)
         p2d = net2.pool
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -642,7 +642,7 @@ def test_SpikingAvgPool1dWithV(
         net1 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
         net2 = SpikingPool1d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
         p1d = net2.pool
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -704,7 +704,7 @@ def test_SpikingAvgPool2dWithV(
         net1 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
         net2 = SpikingPool2d_Net(fm_shape, ksize, stride, padding, threshold, "avgv")
         p2d = net2.pool
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -742,7 +742,7 @@ def test_Transpose2d(self, shape):
         net1 = TransposeModule_T2d_Net(shape)
         net2 = TransposeModule_T2d_Net(shape)
         t2d = net2.t2d
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -792,7 +792,7 @@ def test_Transpose3d(self, shape, axes):
         net1 = TransposeModule_T3d_Net(shape, axes)
         net2 = TransposeModule_T3d_Net(shape, axes)
         t3d = net2.t3d
-        generated = DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 
@@ -830,7 +830,6 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.export(fp=ensure_dump_dir)
 
     @pytest.mark.parametrize(
-        # NOTE: Only support padding in the first semi-folded conv2d for now.
         "ishape_chw, n_conv, kshape_oihw, stride, padding, out_features",
         [
             # n_conv = 1
@@ -965,7 +964,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         # `net1.conv_list` will be removed in `build_fmodule`
         conv2d_list = net1.conv_list.copy()
         linear = net1.linear1
-        generated = DynSysGroup.build_fmodule(net1)
+        generated = net1.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
 
         probe_conv_list = []
@@ -978,20 +977,23 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         sim1.add_probe(probe_linear)
 
         semi_folded_modules = [*conv2d_list, linear]
-        semi_valid_interval = []
-        for m in semi_folded_modules:
-            semi_valid_interval.append(m.valid_interval)
-
-        ts_1st_valid = [0] * n_conv
+        # The interval & the time o the first valid data of the external input data stream
+        semi_vld_out_intv0 = 1
+        t_1st_vld_data0 = 0
+        # The interval & the time of the first valid data of the current layers
+        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
+        t_1st_vld_data = [0] * n_conv
         for i in range(n_conv):
             if i == 0:
-                ts_1st_valid[i] = (
-                    kshape_oihw[0][-1] - paddings[0][0]
-                ) * semi_valid_interval[0]
+                t_1st_vld_data[i] = (
+                    t_1st_vld_data0
+                    + (kshape_oihw[0][-1] - paddings[0][0]) * semi_vld_out_intv0
+                )
             else:
-                ts_1st_valid[i] = (
-                    ts_1st_valid[i - 1]
-                    + (kshape_oihw[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
+                t_1st_vld_data[i] = (
+                    t_1st_vld_data[i - 1]
+                    + (kshape_oihw[i][-1] - 1 - paddings[i][0])
+                    * semi_vld_out_intv[i - 1]
                 )
 
         n_test = 3  # can be more
@@ -1034,8 +1036,8 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                         x[:, :, i].ravel(),
                         sim1.data[probe_conv_list[i_conv]][
                             conv2d_list[i_conv].tick_wait_start
-                            + ts_1st_valid[i_conv]
-                            + i * semi_valid_interval[i_conv + 1]
+                            + t_1st_vld_data[i_conv]
+                            + i * semi_vld_out_intv[i_conv]
                             - 1
                         ],
                     )
@@ -1047,10 +1049,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start
-                    + ts_1st_valid[-1]
-                    + (ows[-1] - 1) * semi_valid_interval[-1]
-                    - 1
+                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
                 ],
             )
 
@@ -1160,7 +1159,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         # `net1.pool_list` will be removed in `build_fmodule`
         pool2d_list = net1.pool_list.copy()
         linear = net1.linear1
-        generated = DynSysGroup.build_fmodule(net1)
+        generated = net1.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
 
         probe_pool_list = []
@@ -1173,20 +1172,22 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         sim1.add_probe(probe_linear)
 
         semi_folded_modules = [*pool2d_list, linear]
-        semi_valid_interval = []
-        for m in semi_folded_modules:
-            semi_valid_interval.append(m.valid_interval)
-
-        ts_1st_valid = [0] * n_pool
+        # The interval & the time o the first valid data of the external input data stream
+        semi_vld_out_intv0 = 1
+        t_1st_vld_data0 = 0
+        # The interval & the time of the first valid data of the current layers
+        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
+        t_1st_vld_data = [0] * n_pool
         for i in range(n_pool):
             if i == 0:
-                ts_1st_valid[i] = (
-                    ksizes[0][-1] - paddings[0][0]
-                ) * semi_valid_interval[0]
+                t_1st_vld_data[i] = (
+                    t_1st_vld_data0
+                    + (ksizes[i][-1] - paddings[i][0]) * semi_vld_out_intv0
+                )
             else:
-                ts_1st_valid[i] = (
-                    ts_1st_valid[i - 1]
-                    + (ksizes[i][-1] - 1 - paddings[i][0]) * semi_valid_interval[i]
+                t_1st_vld_data[i] = (
+                    t_1st_vld_data[i - 1]
+                    + (ksizes[i][-1] - 1 - paddings[i][0]) * semi_vld_out_intv[i - 1]
                 )
 
         n_test = 3  # can be more
@@ -1217,8 +1218,8 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                         x[:, :, i].ravel(),
                         sim1.data[probe_pool_list[i_pool]][
                             pool2d_list[i_pool].tick_wait_start
-                            + ts_1st_valid[i_pool]
-                            + i * semi_valid_interval[i_pool + 1]
+                            + t_1st_vld_data[i_pool]
+                            + i * semi_vld_out_intv[i_pool]
                             - 1
                         ],
                     )
@@ -1230,10 +1231,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start
-                    + ts_1st_valid[-1]
-                    + (ows[-1] - 1) * semi_valid_interval[-1]
-                    - 1
+                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
                 ],
             )
 
@@ -1250,7 +1248,7 @@ def test_Linear(self, shape, weight):
         net1 = Linear_Net(shape, weight)
         net2 = Linear_Net(shape, weight)
         linear = net2.linear1
-        generated = pb.DynSysGroup.build_fmodule(net2)
+        generated = net2.build_modules()
         sim1 = pb.Simulator(net1, start_time_zero=False)
         sim2 = pb.Simulator(net2, start_time_zero=False)
 

From 3d5b37a4160fdda369d4b1de7e4c32a99250ee65 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 26 Nov 2024 16:16:47 +0800
Subject: [PATCH 141/187] =?UTF-8?q?=E2=9C=85=20test(onboard):=20sync=20cha?=
 =?UTF-8?q?nges=20for=20on-board=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/__init__.py             |  4 --
 tests/onboard/README.md       | 40 ++++++++--------
 tests/onboard/test_onboard.py | 88 +++++++++++++++++------------------
 3 files changed, 64 insertions(+), 68 deletions(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index 9cbd67a8..e69de29b 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,4 +0,0 @@
-import os
-import sys
-
-sys.path.append(os.getcwd())
diff --git a/tests/onboard/README.md b/tests/onboard/README.md
index 65e86190..2d1f0104 100644
--- a/tests/onboard/README.md
+++ b/tests/onboard/README.md
@@ -2,8 +2,8 @@
 
 ## ANN权重映射
 
-|         测试项目          | 结果 |    备注    |
-| :-----------------------: | :--: | :--------: |
+|        测试项目        | 结果 |    备注    |
+| :--------------------: | :--: | :--------: |
 | [001 单层](#001-单层w8e1) |  ✅  |            |
 | [002 单层](#002-单层w8e4) |  ✅  |            |
 | [003 单层](#003-单层w2e2) |  ✅  |            |
@@ -124,8 +124,8 @@
 
 ## SNN算子
 
-|         测试项目          | 结果 | 备注 |
-| :-----------------------: | :--: | :--: |
+|        测试项目        | 结果 | 备注 |
+| :--------------------: | :--: | :--: |
 | [001 Conv1d](#001-conv1d) |  ✅  |      |
 
 ### Conv1d
@@ -145,22 +145,22 @@
 
 ## 半折叠算子
 
-|                      测试项目                       | 结果 |    备注    |
-| :-------------------------------------------------: | :--: | :--------: |
-|    [001 Conv2dSemiFolded](#001-conv2dsemifolded)    |  ✅  |            |
-|    [002 Conv2dSemiFolded](#002-conv2dsemifolded)    |  ❌  | 不完全相等 |
-|    [003 Conv2dSemiFolded](#003-conv2dsemifolded)    |      |            |
-|    [004 Conv2dSemiFolded](#004-conv2dsemifolded)    |      |            |
-|    [005 Conv2dSemiFolded](#005-conv2dsemifolded)    |      |            |
-|    [006 Conv2dSemiFolded](#006-conv2dsemifolded)    |  ❌  | 不完全相等 |
-|    [007 Conv2dSemiFolded](#007-conv2dsemifolded)    |      |            |
-|    [008 Conv2dSemiFolded](#008-conv2dsemifolded)    |      |            |
-|    [009 Conv2dSemiFolded](#009-conv2dsemifolded)    |      |            |
-| [010 MaxPool2dSemiFolded](#010-maxpool2dsemifolded) |      |            |
-| [011 AvgPool2dSemiFolded](#011-avgpool2dsemifolded) |  ✅  |            |
-| [012 Conv2dSemiFoldedNet](#012-conv2dsemifoldednet) |      |            |
-| [013 Conv2dSemiFoldedNet](#013-conv2dsemifoldednet) |      |            |
-|    [014 CNNSemiFoldedNet](#014-cnnsemifoldednet)    |      |            |
+|                     测试项目                     | 结果 |                 备注                 |
+| :-----------------------------------------------: | :--: | :----------------------------------: |
+|    [001 Conv2dSemiFolded](#001-conv2dsemifolded)    |  ✅  |                                      |
+|    [002 Conv2dSemiFolded](#002-conv2dsemifolded)    |  ✅  |                                      |
+|    [003 Conv2dSemiFolded](#003-conv2dsemifolded)    |  ✅  |                                      |
+|    [004 Conv2dSemiFolded](#004-conv2dsemifolded)    |  ✅  | 仅错在前2、3时间步，本身就是无效数据 |
+|    [005 Conv2dSemiFolded](#005-conv2dsemifolded)    |  ✅  |                                      |
+|    [006 Conv2dSemiFolded](#006-conv2dsemifolded)    |  ✅  |                                      |
+|    [007 Conv2dSemiFolded](#007-conv2dsemifolded)    |  ✅  |                                      |
+|    [008 Conv2dSemiFolded](#008-conv2dsemifolded)    |  ✅  |                                      |
+|    [009 Conv2dSemiFolded](#009-conv2dsemifolded)    |  ✅  |                                      |
+| [010 MaxPool2dSemiFolded](#010-maxpool2dsemifolded) |  ❌  |                                      |
+| [011 AvgPool2dSemiFolded](#011-avgpool2dsemifolded) |  ✅  |                                      |
+| [012 Conv2dSemiFoldedNet](#012-conv2dsemifoldednet) |  ❌  |                                      |
+| [013 Conv2dSemiFoldedNet](#013-conv2dsemifoldednet) |  ✅  |                                      |
+|    [014 CNNSemiFoldedNet](#014-cnnsemifoldednet)    |  ✅  |                                      |
 
 ### 单层
 
diff --git a/tests/onboard/test_onboard.py b/tests/onboard/test_onboard.py
index d18f094f..9e51b0f2 100644
--- a/tests/onboard/test_onboard.py
+++ b/tests/onboard/test_onboard.py
@@ -792,7 +792,7 @@ def __init__(self, w1, w2):
 
 
 class TestOnBoard_SpikingOp:
-    def test_Conv1d_001(self):
+    def test_001_Conv1d(self):
         class Net001(pb.Network):
             def __init__(self, w1):
                 super().__init__()
@@ -803,7 +803,7 @@ def __init__(self, w1):
                 self.p1 = pb.Probe(self.n1, "feature_map")
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv1d_001.__name__
+        TEST_NAME = self.test_001_Conv1d.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -880,7 +880,7 @@ def __init__(self, w1):
 
 
 class TestOnBoard_SemiFoldedOp:
-    def test_Conv2dSemiFolded_001(self):
+    def test_001_Conv2dSemiFolded(self):
         class Net001(pb.DynSysGroup):
             def __init__(self, w1):
                 super().__init__()
@@ -888,7 +888,7 @@ def __init__(self, w1):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w1, 1, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_001.__name__
+        TEST_NAME = self.test_001_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -929,7 +929,7 @@ def __init__(self, w1):
 
         network = Net001(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -959,7 +959,7 @@ def __init__(self, w1):
 
     # 对比test002-005系列
     # weight正常
-    def test_Conv2dSemiFolded_002(self):
+    def test_002_Conv2dSemiFolded(self):
         class Net002(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -967,7 +967,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_002.__name__
+        TEST_NAME = self.test_002_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1009,7 +1009,7 @@ def __init__(self, w2):
 
         network = Net002(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1038,7 +1038,7 @@ def __init__(self, w2):
         print(f"Test {TEST_NAME} end")
 
     # weight全为1
-    def test_Conv2dSemiFolded_003(self):
+    def test_003_Conv2dSemiFolded(self):
         class Net003(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -1046,7 +1046,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_003.__name__
+        TEST_NAME = self.test_003_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1088,7 +1088,7 @@ def __init__(self, w2):
 
         network = Net003(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1117,7 +1117,7 @@ def __init__(self, w2):
         print(f"Test {TEST_NAME} end")
 
     # 扇入扩展， weight全正1
-    def test_Conv2dSemiFolded_004(self):
+    def test_004_Conv2dSemiFolded(self):
         class Net004(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -1125,7 +1125,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_004.__name__
+        TEST_NAME = self.test_004_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1168,7 +1168,7 @@ def __init__(self, w2):
 
         network = Net004(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1197,7 +1197,7 @@ def __init__(self, w2):
         print(f"Test {TEST_NAME} end")
 
     # 扇入扩展
-    def test_Conv2dSemiFolded_005(self):
+    def test_005_Conv2dSemiFolded(self):
         class Net005(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -1205,7 +1205,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 2, 0, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_005.__name__
+        TEST_NAME = self.test_005_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1248,7 +1248,7 @@ def __init__(self, w2):
 
         network = Net005(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1277,7 +1277,7 @@ def __init__(self, w2):
         print(f"Test {TEST_NAME} end")
 
     # 对比006-009
-    def test_Conv2dSemiFolded_006(self):
+    def test_006_Conv2dSemiFolded(self):
         class Net006(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -1285,7 +1285,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_006.__name__
+        TEST_NAME = self.test_006_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1328,7 +1328,7 @@ def __init__(self, w2):
 
         network = Net006(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1356,7 +1356,7 @@ def __init__(self, w2):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_Conv2dSemiFolded_007(self):
+    def test_007_Conv2dSemiFolded(self):
         class Net007(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -1364,7 +1364,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_007.__name__
+        TEST_NAME = self.test_007_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1408,7 +1408,7 @@ def __init__(self, w2):
 
         network = Net007(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1436,7 +1436,7 @@ def __init__(self, w2):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_Conv2dSemiFolded_008(self):
+    def test_008_Conv2dSemiFolded(self):
         class Net008(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -1444,7 +1444,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_008.__name__
+        TEST_NAME = self.test_008_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1488,7 +1488,7 @@ def __init__(self, w2):
 
         network = Net008(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1516,7 +1516,7 @@ def __init__(self, w2):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_Conv2dSemiFolded_009(self):
+    def test_009_Conv2dSemiFolded(self):
         class Net009(pb.DynSysGroup):
             def __init__(self, w2):
                 super().__init__()
@@ -1524,7 +1524,7 @@ def __init__(self, w2):
                 self.conv1 = pb.Conv2dSemiFolded(self.i1, w2, 1, 1, tick_wait_start=1)
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFolded_009.__name__
+        TEST_NAME = self.test_009_Conv2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1565,7 +1565,7 @@ def __init__(self, w2):
 
         network = Net009(weight1)
         conv2d = network.conv1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[conv2d][0], "output")
         sim.add_probe(probe)
@@ -1593,7 +1593,7 @@ def __init__(self, w2):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_MaxPool2dSemiFolded_010(self):
+    def test_010_MaxPool2dSemiFolded(self):
         class Net010(pb.DynSysGroup):
             def __init__(self, ksize):
                 super().__init__()
@@ -1603,7 +1603,7 @@ def __init__(self, ksize):
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_MaxPool2dSemiFolded_010.__name__
+        TEST_NAME = self.test_010_MaxPool2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1643,7 +1643,7 @@ def __init__(self, ksize):
 
         network = Net010(ksize)
         pool = network.pool1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[pool][0], "output")
         sim.add_probe(probe)
@@ -1669,7 +1669,7 @@ def __init__(self, ksize):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_AvgPool2dSemiFolded_011(self):
+    def test_011_AvgPool2dSemiFolded(self):
         class Net011(pb.DynSysGroup):
             def __init__(self, ksize):
                 super().__init__()
@@ -1679,7 +1679,7 @@ def __init__(self, ksize):
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_AvgPool2dSemiFolded_011.__name__
+        TEST_NAME = self.test_011_AvgPool2dSemiFolded.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1719,7 +1719,7 @@ def __init__(self, ksize):
 
         network = Net011(ksize)
         pool = network.pool1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe = pb.Probe(generated[pool][0], "output")
         sim.add_probe(probe)
@@ -1748,7 +1748,7 @@ def __init__(self, ksize):
     @pytest.mark.xfail(
         reason="A ValidationError will be raised due to the backend not support."
     )
-    def test_Conv2dSemiFoldedNet_012(self):
+    def test_012_Conv2dSemiFoldedNet(self):
         class Net012(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):
                 super().__init__()
@@ -1764,7 +1764,7 @@ def __init__(self, w1, w2, w3):
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFoldedNet_012.__name__
+        TEST_NAME = self.test_012_Conv2dSemiFoldedNet.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1811,7 +1811,7 @@ def __init__(self, w1, w2, w3):
         conv2d1 = network.conv1
         conv2d2 = network.conv2
         linear = network.linear1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe1 = pb.Probe(generated[conv2d1][0], "output")
         probe2 = pb.Probe(generated[conv2d2][0], "output")
@@ -1849,7 +1849,7 @@ def __init__(self, w1, w2, w3):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_Conv2dSemiFoldedNet_013(self):
+    def test_013_Conv2dSemiFoldedNet(self):
         class Net013(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):
                 super().__init__()
@@ -1863,7 +1863,7 @@ def __init__(self, w1, w2, w3):
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_Conv2dSemiFoldedNet_013.__name__
+        TEST_NAME = self.test_013_Conv2dSemiFoldedNet.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -1909,7 +1909,7 @@ def __init__(self, w1, w2, w3):
         conv2d1 = network.conv1
         conv2d2 = network.conv2
         linear = network.linear1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe1 = pb.Probe(generated[conv2d1][0], "output")
         probe2 = pb.Probe(generated[conv2d2][0], "output")
@@ -1947,7 +1947,7 @@ def __init__(self, w1, w2, w3):
 
         print(f"Test {TEST_NAME} end")
 
-    def test_CNNSemiFoldedNet_014(self):
+    def test_014_CNNSemiFoldedNet(self):
         class Net014(pb.DynSysGroup):
             def __init__(self, w1, w2, w3):
                 super().__init__()
@@ -1968,7 +1968,7 @@ def __init__(self, w1, w2, w3):
                 )
 
         USE_EXISTING_DATA = False
-        TEST_NAME = self.test_CNNSemiFoldedNet_014.__name__
+        TEST_NAME = self.test_014_CNNSemiFoldedNet.__name__
         TEST_CASE_DIR = DATA_DIR / TEST_NAME
         CONFIG_CASE_DIR = CONFIG_DIR / TEST_NAME
         if not TEST_CASE_DIR.exists():
@@ -2015,7 +2015,7 @@ def __init__(self, w1, w2, w3):
         conv2d1 = network.conv1
         conv2d2 = network.conv2
         linear = network.linear1
-        generated = pb.DynSysGroup.build_fmodule(network)
+        generated = network.build_modules()
         sim = pb.Simulator(network, start_time_zero=False)
         probe1 = pb.Probe(generated[conv2d1][0], "output")
         probe2 = pb.Probe(generated[conv2d2][0], "output")

From b6a7ac63ec66fa268913b561d1f41409d6ad3e91 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Nov 2024 08:57:56 +0000
Subject: [PATCH 142/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/components/_modules.py |  2 +-
 paibox/network.py             |  7 +++----
 tests/onboard/README.md       | 12 ++++++------
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index e20b6aa6..ea22fbe3 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,6 +1,6 @@
-from dataclasses import dataclass
 import math
 import typing
+from dataclasses import dataclass
 from typing import Literal, Optional, Union
 
 import numpy as np
diff --git a/paibox/network.py b/paibox/network.py
index 4f25a48c..4f0a4c12 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -1,16 +1,15 @@
-from collections.abc import Sequence
 import sys
+from collections.abc import Sequence
 from typing import Optional, Union
 
 import numpy as np
 
-from .exceptions import NotSupportedError
-
 from .base import DynamicSys, SynSys
 from .collector import Collector
-from .components._modules import _SemiFoldedModule, SemiFoldedStreamAttr
 from .components import NeuModule, Neuron, Projection
+from .components._modules import SemiFoldedStreamAttr, _SemiFoldedModule
 from .components.modules import BuiltComponentType
+from .exceptions import NotSupportedError
 from .mixin import Container
 from .node import NodeDict, NodeList
 
diff --git a/tests/onboard/README.md b/tests/onboard/README.md
index 2d1f0104..49ccf3e5 100644
--- a/tests/onboard/README.md
+++ b/tests/onboard/README.md
@@ -2,8 +2,8 @@
 
 ## ANN权重映射
 
-|        测试项目        | 结果 |    备注    |
-| :--------------------: | :--: | :--------: |
+|         测试项目          | 结果 |    备注    |
+| :-----------------------: | :--: | :--------: |
 | [001 单层](#001-单层w8e1) |  ✅  |            |
 | [002 单层](#002-单层w8e4) |  ✅  |            |
 | [003 单层](#003-单层w2e2) |  ✅  |            |
@@ -124,8 +124,8 @@
 
 ## SNN算子
 
-|        测试项目        | 结果 | 备注 |
-| :--------------------: | :--: | :--: |
+|         测试项目          | 结果 | 备注 |
+| :-----------------------: | :--: | :--: |
 | [001 Conv1d](#001-conv1d) |  ✅  |      |
 
 ### Conv1d
@@ -145,8 +145,8 @@
 
 ## 半折叠算子
 
-|                     测试项目                     | 结果 |                 备注                 |
-| :-----------------------------------------------: | :--: | :----------------------------------: |
+|                      测试项目                       | 结果 |                 备注                 |
+| :-------------------------------------------------: | :--: | :----------------------------------: |
 |    [001 Conv2dSemiFolded](#001-conv2dsemifolded)    |  ✅  |                                      |
 |    [002 Conv2dSemiFolded](#002-conv2dsemifolded)    |  ✅  |                                      |
 |    [003 Conv2dSemiFolded](#003-conv2dsemifolded)    |  ✅  |                                      |

From 4cdb394e89c503fac88b6451fdc8741aac2d3eec Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 26 Nov 2024 21:30:35 +0800
Subject: [PATCH 143/187] =?UTF-8?q?=F0=9F=A4=96=20ci:=20rename=20workflow?=
 =?UTF-8?q?=20&=20add=20code=20coverage=20report=20for=20dev=20&=20master?=
 =?UTF-8?q?=20branches?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/codecov.yml   |  61 ++++++++++++++++++
 .github/workflows/pytest-ci.yml |  45 --------------
 README.md                       |   3 +
 poetry.lock                     | 107 +++++++++++++++++++++++++++++++-
 pyproject.toml                  |   2 +
 5 files changed, 172 insertions(+), 46 deletions(-)
 create mode 100644 .github/workflows/codecov.yml
 delete mode 100644 .github/workflows/pytest-ci.yml

diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
new file mode 100644
index 00000000..05f9ce7b
--- /dev/null
+++ b/.github/workflows/codecov.yml
@@ -0,0 +1,61 @@
+name: Pytest & code coverage
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+      - dev
+    paths:
+      - "paibox/**"
+      - "tests/**"
+      - ".github/workflows/codecov.yml"
+      - "pyproject.toml"
+      - "poetry.lock"
+
+permissions:
+  contents: read
+
+jobs:
+  pytest:
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        os: [ubuntu-latest, windows-latest]
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install poetry
+        uses: abatilo/actions-poetry@v3
+
+      - name: Install test dependencies
+        run: |
+          poetry install --with test --sync
+
+      - name: Run pytest
+        run: |
+          poetry run pytest --cov-append --cov-report=xml --junitxml=junit.xml
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          fail_ci_if_error: true
+          flags: unittests
+          token: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Upload test results to Codecov
+        if: ${{ !cancelled() }}
+        uses: codecov/test-results-action@v1
+        with:
+          fail_ci_if_error: true
+          flags: unittests
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/pytest-ci.yml b/.github/workflows/pytest-ci.yml
deleted file mode 100644
index 533bcf76..00000000
--- a/.github/workflows/pytest-ci.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: Python CI with pytest
-
-on:
-  pull_request:
-    branches:
-      - master
-      - dev
-    types: [opened, synchronize, reopened]
-
-permissions:
-  contents: read
-
-jobs:
-  pytest-ci:
-    strategy:
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-        os: [ubuntu-latest, windows-latest]
-    runs-on: ${{ matrix.os }}
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install poetry
-        uses: abatilo/actions-poetry@v2
-
-      - name: Install test dependencies
-        run: |
-          poetry install --with test --sync
-
-      - name: Run pytest
-        uses: pavelzw/pytest-action@v2
-        with:
-          verbose: false
-          emoji: false
-          job-summary: true
-          custom-arguments: "-q"
-          custom-pytest: "poetry run pytest"
-          click-to-expand: true
-          report-title: "Test Report"
diff --git a/README.md b/README.md
index d4b2dbfa..10b29e47 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,9 @@
     <a href="https://results.pre-commit.ci/latest/github/PAICookers/PAIBox/master">
         <img alt="pre-commit.ci status" src="https://results.pre-commit.ci/badge/github/PAICookers/PAIBox/master.svg">
     </a>
+    <a href="https://codecov.io/gh/PAICookers/PAIBox" > 
+        <img src="https://codecov.io/gh/PAICookers/PAIBox/branch/master/graph/badge.svg?token=949SKVGRMC"/> 
+    </a>
 </p>
 
 👉 [用户使用指南](docs/Guide-of-PAIBox.md)
diff --git a/poetry.lock b/poetry.lock
index f87c950d..a0edf08e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -32,6 +32,88 @@ type = "legacy"
 url = "https://pypi.tuna.tsinghua.edu.cn/simple"
 reference = "tsinghua"
 
+[[package]]
+name = "coverage"
+version = "7.6.8"
+description = "Code coverage measurement for Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "coverage-7.6.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b39e6011cd06822eb964d038d5dff5da5d98652b81f5ecd439277b32361a3a50"},
+    {file = "coverage-7.6.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:63c19702db10ad79151a059d2d6336fe0c470f2e18d0d4d1a57f7f9713875dcf"},
+    {file = "coverage-7.6.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3985b9be361d8fb6b2d1adc9924d01dec575a1d7453a14cccd73225cb79243ee"},
+    {file = "coverage-7.6.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:644ec81edec0f4ad17d51c838a7d01e42811054543b76d4ba2c5d6af741ce2a6"},
+    {file = "coverage-7.6.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f188a2402f8359cf0c4b1fe89eea40dc13b52e7b4fd4812450da9fcd210181d"},
+    {file = "coverage-7.6.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e19122296822deafce89a0c5e8685704c067ae65d45e79718c92df7b3ec3d331"},
+    {file = "coverage-7.6.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:13618bed0c38acc418896005732e565b317aa9e98d855a0e9f211a7ffc2d6638"},
+    {file = "coverage-7.6.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:193e3bffca48ad74b8c764fb4492dd875038a2f9925530cb094db92bb5e47bed"},
+    {file = "coverage-7.6.8-cp310-cp310-win32.whl", hash = "sha256:3988665ee376abce49613701336544041f2117de7b7fbfe91b93d8ff8b151c8e"},
+    {file = "coverage-7.6.8-cp310-cp310-win_amd64.whl", hash = "sha256:f56f49b2553d7dd85fd86e029515a221e5c1f8cb3d9c38b470bc38bde7b8445a"},
+    {file = "coverage-7.6.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:86cffe9c6dfcfe22e28027069725c7f57f4b868a3f86e81d1c62462764dc46d4"},
+    {file = "coverage-7.6.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d82ab6816c3277dc962cfcdc85b1efa0e5f50fb2c449432deaf2398a2928ab94"},
+    {file = "coverage-7.6.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13690e923a3932e4fad4c0ebfb9cb5988e03d9dcb4c5150b5fcbf58fd8bddfc4"},
+    {file = "coverage-7.6.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be32da0c3827ac9132bb488d331cb32e8d9638dd41a0557c5569d57cf22c9c1"},
+    {file = "coverage-7.6.8-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44e6c85bbdc809383b509d732b06419fb4544dca29ebe18480379633623baafb"},
+    {file = "coverage-7.6.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:768939f7c4353c0fac2f7c37897e10b1414b571fd85dd9fc49e6a87e37a2e0d8"},
+    {file = "coverage-7.6.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e44961e36cb13c495806d4cac67640ac2866cb99044e210895b506c26ee63d3a"},
+    {file = "coverage-7.6.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ea8bb1ab9558374c0ab591783808511d135a833c3ca64a18ec927f20c4030f0"},
+    {file = "coverage-7.6.8-cp311-cp311-win32.whl", hash = "sha256:629a1ba2115dce8bf75a5cce9f2486ae483cb89c0145795603d6554bdc83e801"},
+    {file = "coverage-7.6.8-cp311-cp311-win_amd64.whl", hash = "sha256:fb9fc32399dca861584d96eccd6c980b69bbcd7c228d06fb74fe53e007aa8ef9"},
+    {file = "coverage-7.6.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e683e6ecc587643f8cde8f5da6768e9d165cd31edf39ee90ed7034f9ca0eefee"},
+    {file = "coverage-7.6.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1defe91d41ce1bd44b40fabf071e6a01a5aa14de4a31b986aa9dfd1b3e3e414a"},
+    {file = "coverage-7.6.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7ad66e8e50225ebf4236368cc43c37f59d5e6728f15f6e258c8639fa0dd8e6d"},
+    {file = "coverage-7.6.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3fe47da3e4fda5f1abb5709c156eca207eacf8007304ce3019eb001e7a7204cb"},
+    {file = "coverage-7.6.8-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:202a2d645c5a46b84992f55b0a3affe4f0ba6b4c611abec32ee88358db4bb649"},
+    {file = "coverage-7.6.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4674f0daa1823c295845b6a740d98a840d7a1c11df00d1fd62614545c1583787"},
+    {file = "coverage-7.6.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:74610105ebd6f33d7c10f8907afed696e79c59e3043c5f20eaa3a46fddf33b4c"},
+    {file = "coverage-7.6.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37cda8712145917105e07aab96388ae76e787270ec04bcb9d5cc786d7cbb8443"},
+    {file = "coverage-7.6.8-cp312-cp312-win32.whl", hash = "sha256:9e89d5c8509fbd6c03d0dd1972925b22f50db0792ce06324ba069f10787429ad"},
+    {file = "coverage-7.6.8-cp312-cp312-win_amd64.whl", hash = "sha256:379c111d3558272a2cae3d8e57e6b6e6f4fe652905692d54bad5ea0ca37c5ad4"},
+    {file = "coverage-7.6.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b0c69f4f724c64dfbfe79f5dfb503b42fe6127b8d479b2677f2b227478db2eb"},
+    {file = "coverage-7.6.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c15b32a7aca8038ed7644f854bf17b663bc38e1671b5d6f43f9a2b2bd0c46f63"},
+    {file = "coverage-7.6.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63068a11171e4276f6ece913bde059e77c713b48c3a848814a6537f35afb8365"},
+    {file = "coverage-7.6.8-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f4548c5ead23ad13fb7a2c8ea541357474ec13c2b736feb02e19a3085fac002"},
+    {file = "coverage-7.6.8-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b4b4299dd0d2c67caaaf286d58aef5e75b125b95615dda4542561a5a566a1e3"},
+    {file = "coverage-7.6.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c9ebfb2507751f7196995142f057d1324afdab56db1d9743aab7f50289abd022"},
+    {file = "coverage-7.6.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c1b4474beee02ede1eef86c25ad4600a424fe36cff01a6103cb4533c6bf0169e"},
+    {file = "coverage-7.6.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d9fd2547e6decdbf985d579cf3fc78e4c1d662b9b0ff7cc7862baaab71c9cc5b"},
+    {file = "coverage-7.6.8-cp313-cp313-win32.whl", hash = "sha256:8aae5aea53cbfe024919715eca696b1a3201886ce83790537d1c3668459c7146"},
+    {file = "coverage-7.6.8-cp313-cp313-win_amd64.whl", hash = "sha256:ae270e79f7e169ccfe23284ff5ea2d52a6f401dc01b337efb54b3783e2ce3f28"},
+    {file = "coverage-7.6.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:de38add67a0af869b0d79c525d3e4588ac1ffa92f39116dbe0ed9753f26eba7d"},
+    {file = "coverage-7.6.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b07c25d52b1c16ce5de088046cd2432b30f9ad5e224ff17c8f496d9cb7d1d451"},
+    {file = "coverage-7.6.8-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62a66ff235e4c2e37ed3b6104d8b478d767ff73838d1222132a7a026aa548764"},
+    {file = "coverage-7.6.8-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09b9f848b28081e7b975a3626e9081574a7b9196cde26604540582da60235fdf"},
+    {file = "coverage-7.6.8-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:093896e530c38c8e9c996901858ac63f3d4171268db2c9c8b373a228f459bbc5"},
+    {file = "coverage-7.6.8-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9a7b8ac36fd688c8361cbc7bf1cb5866977ece6e0b17c34aa0df58bda4fa18a4"},
+    {file = "coverage-7.6.8-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:38c51297b35b3ed91670e1e4efb702b790002e3245a28c76e627478aa3c10d83"},
+    {file = "coverage-7.6.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2e4e0f60cb4bd7396108823548e82fdab72d4d8a65e58e2c19bbbc2f1e2bfa4b"},
+    {file = "coverage-7.6.8-cp313-cp313t-win32.whl", hash = "sha256:6535d996f6537ecb298b4e287a855f37deaf64ff007162ec0afb9ab8ba3b8b71"},
+    {file = "coverage-7.6.8-cp313-cp313t-win_amd64.whl", hash = "sha256:c79c0685f142ca53256722a384540832420dff4ab15fec1863d7e5bc8691bdcc"},
+    {file = "coverage-7.6.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ac47fa29d8d41059ea3df65bd3ade92f97ee4910ed638e87075b8e8ce69599e"},
+    {file = "coverage-7.6.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:24eda3a24a38157eee639ca9afe45eefa8d2420d49468819ac5f88b10de84f4c"},
+    {file = "coverage-7.6.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4c81ed2820b9023a9a90717020315e63b17b18c274a332e3b6437d7ff70abe0"},
+    {file = "coverage-7.6.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd55f8fc8fa494958772a2a7302b0354ab16e0b9272b3c3d83cdb5bec5bd1779"},
+    {file = "coverage-7.6.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f39e2f3530ed1626c66e7493be7a8423b023ca852aacdc91fb30162c350d2a92"},
+    {file = "coverage-7.6.8-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:716a78a342679cd1177bc8c2fe957e0ab91405bd43a17094324845200b2fddf4"},
+    {file = "coverage-7.6.8-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:177f01eeaa3aee4a5ffb0d1439c5952b53d5010f86e9d2667963e632e30082cc"},
+    {file = "coverage-7.6.8-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:912e95017ff51dc3d7b6e2be158dedc889d9a5cc3382445589ce554f1a34c0ea"},
+    {file = "coverage-7.6.8-cp39-cp39-win32.whl", hash = "sha256:4db3ed6a907b555e57cc2e6f14dc3a4c2458cdad8919e40b5357ab9b6db6c43e"},
+    {file = "coverage-7.6.8-cp39-cp39-win_amd64.whl", hash = "sha256:428ac484592f780e8cd7b6b14eb568f7c85460c92e2a37cb0c0e5186e1a0d076"},
+    {file = "coverage-7.6.8-pp39.pp310-none-any.whl", hash = "sha256:5c52a036535d12590c32c49209e79cabaad9f9ad8aa4cbd875b68c4d67a9cbce"},
+    {file = "coverage-7.6.8.tar.gz", hash = "sha256:8b2b8503edb06822c86d82fa64a4a5cb0760bb8f31f26e138ec743f422f37cfc"},
+]
+
+[package.dependencies]
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
+
+[package.extras]
+toml = ["tomli"]
+
+[package.source]
+type = "legacy"
+url = "https://pypi.tuna.tsinghua.edu.cn/simple"
+reference = "tsinghua"
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -405,6 +487,29 @@ type = "legacy"
 url = "https://pypi.tuna.tsinghua.edu.cn/simple"
 reference = "tsinghua"
 
+[[package]]
+name = "pytest-cov"
+version = "6.0.0"
+description = "Pytest plugin for measuring coverage."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pytest-cov-6.0.0.tar.gz", hash = "sha256:fde0b595ca248bb8e2d76f020b465f3b107c9632e6a1d1705f17834c89dcadc0"},
+    {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"},
+]
+
+[package.dependencies]
+coverage = {version = ">=7.5", extras = ["toml"]}
+pytest = ">=4.6"
+
+[package.extras]
+testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"]
+
+[package.source]
+type = "legacy"
+url = "https://pypi.tuna.tsinghua.edu.cn/simple"
+reference = "tsinghua"
+
 [[package]]
 name = "pytest-md"
 version = "0.2.0"
@@ -459,4 +564,4 @@ reference = "tsinghua"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "25cfd043004050d36aa3b7c7bede80ae551fda620af4c9d8600ffde29b8f8c61"
+content-hash = "b5d36748eeaeb04ded544fd4992e7a16a4b33efbd4099390b93965543e502631"
diff --git a/pyproject.toml b/pyproject.toml
index 2a0bd1bf..71124dab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,7 @@ optional = true
 [tool.poetry.group.test.dependencies]
 pytest = "^8.0.0"
 pytest-md = "^0.2.0"
+pytest-cov = "^6.0.0"
 paicorelib = {git = "https://github.com/PAICookers/PAIlib.git", rev = "dev"}
 orjson = "^3.10.0"
 
@@ -59,6 +60,7 @@ orjson = "^3.10.0"
 [tool.pytest.ini_options]
 minversion = "8.0.0"
 testpaths = ["tests"]
+addopts = "--cov=paibox --cov-report=term"
 
 
 [[tool.poetry.source]]

From e0fbae794380a3b7a8766c490766e8862a7f29fa Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Nov 2024 13:31:03 +0000
Subject: [PATCH 144/187] :rotating_light: auto fix by pre-commit hooks

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 10b29e47..cd6e6ae5 100644
--- a/README.md
+++ b/README.md
@@ -17,8 +17,8 @@
     <a href="https://results.pre-commit.ci/latest/github/PAICookers/PAIBox/master">
         <img alt="pre-commit.ci status" src="https://results.pre-commit.ci/badge/github/PAICookers/PAIBox/master.svg">
     </a>
-    <a href="https://codecov.io/gh/PAICookers/PAIBox" > 
-        <img src="https://codecov.io/gh/PAICookers/PAIBox/branch/master/graph/badge.svg?token=949SKVGRMC"/> 
+    <a href="https://codecov.io/gh/PAICookers/PAIBox" >
+        <img src="https://codecov.io/gh/PAICookers/PAIBox/branch/master/graph/badge.svg?token=949SKVGRMC"/>
     </a>
 </p>
 

From 52c523119b94c89dd5942d11ec65640ab40331f4 Mon Sep 17 00:00:00 2001
From: birdswimming <72957950+birdswimming@users.noreply.github.com>
Date: Fri, 1 Nov 2024 14:39:06 +0800
Subject: [PATCH 145/187] bugfix(backend): fix some bugs in axons set  (#136)

* fix bug in coreblock axons

* add test for ordered axons

* :rotating_light: auto fix by pre-commit hooks

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 paibox/backend/graphs.py     |  74 +++++++++++++++++
 paibox/backend/mapper.py     |  67 ++++++++++++---
 paibox/backend/placement.py  |  33 +++++---
 paibox/backend/routing.py    | 157 +++++++++++++++++++++++++++--------
 paibox/backend/types.py      |  18 ++++
 tests/backend/conftest.py    |  66 +++++++++++++++
 tests/backend/test_mapper.py |  42 ++++++++--
 7 files changed, 392 insertions(+), 65 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 335b95cf..c3e2751d 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -557,6 +557,80 @@ def _degree_check(
                 )
 
 
+def find_cycles(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[list[_NT]]:
+    cycles: list[list[_NT]] = []
+    visited: set[_NT] = set()
+    stack: list[_NT] = []
+    stack_set: set[_NT] = set()  # 方便快速检查路径中的节点
+
+    # 深度优先搜索的辅助函数
+    def dfs(node: _NT):
+        if node in stack_set:  # 检测到环
+            cycle_start_index = stack.index(node)
+            cycles.append(stack[cycle_start_index:])
+            return
+        if node in visited:
+            return
+
+        visited.add(node)
+        stack.append(node)
+        stack_set.add(node)
+
+        for neighbor in directed_edges.get(node, []):
+            dfs(neighbor)
+
+        stack.pop()
+        stack_set.remove(node)
+
+    # 遍历每个节点，查找所有可能的环
+    for node in directed_edges:
+        if node not in visited:
+            dfs(node)
+
+    return cycles
+
+
+def merge_overlap(groups: Iterable[Iterable[_NT]]) -> list[list[_NT]]:
+    # 并查集数据结构
+    parent: dict[_NT, _NT] = dict()
+
+    # 查找集合的根节点
+    def find(x):
+        if parent[x] != x:
+            parent[x] = find(parent[x])
+        return parent[x]
+
+    # 合并两个集合
+    def union(x, y):
+        rootX = find(x)
+        rootY = find(y)
+        if rootX != rootY:
+            parent[rootY] = rootX
+
+    # 初始化并查集
+    for group in groups:
+        for element in group:
+            if element not in parent:
+                parent[element] = element
+
+    # 合并所有相互重叠的环
+    for group in groups:
+        first_element = group[0]
+        for element in group[1:]:
+            union(first_element, element)
+
+    # 根据并查集结果，将所有节点归类到同一个集合中
+    merged_groups: dict[_NT, list[_NT]] = dict()
+    for element in parent:
+        root = find(element)
+        if root not in merged_groups:
+            merged_groups[root] = []
+        merged_groups[root].append(element)
+
+    # 将结果转换为列表列表形式
+    return list(merged_groups.values())
+
+
 def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
     """
     Topological sort algorithm by Kahn [1]_.
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 045e33d5..6012d7fc 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -22,10 +22,24 @@
     OutputDestConf,
 )
 from .context import _BACKEND_CONTEXT, set_cflag
-from .graphs import PAIGraph, get_node_degrees, get_succ_cb_by_node, toposort
+from .graphs import (
+    PAIGraph,
+    find_cycles,
+    get_node_degrees,
+    get_succ_cb_by_node,
+    merge_overlap,
+    toposort,
+)
 from .placement import CoreBlock, aligned_coords, max_lcn_of_cb
 from .routing import RoutingGroup, RoutingManager
-from .types import NeuSegment, NodeDegree, NodeType, SourceNodeType, is_iw8
+from .types import (
+    MergedSuccGroup,
+    NeuSegment,
+    NodeDegree,
+    NodeType,
+    SourceNodeType,
+    is_iw8,
+)
 
 __all__ = ["Mapper"]
 
@@ -202,10 +216,19 @@ def untwist_branch_nodes(self) -> None:
 
     def build_core_blocks(self) -> None:
         """Build core blocks based on partitioned edges."""
-        merged_sgrps = self.graph.graph_partition()
+        merged_sgrps: list[MergedSuccGroup] = self.graph.graph_partition()
+        merged_sgrps: list[MergedSuccGroup] = cycle_merge(merged_sgrps)
 
         for msgrp in merged_sgrps:
-            self.routing_groups.append(RoutingGroup.build(msgrp))
+            self.routing_groups.append(RoutingGroup.build(msgrp, True))
+
+        routing_groups: list[RoutingGroup] = list()
+        for rg in self.routing_groups:
+            routing_groups.extend(rg.optimize_group())
+        self.routing_groups = routing_groups
+
+        for rg in self.routing_groups:
+            rg.dump()
 
         for rg in self.routing_groups:
             self.core_blocks += rg.core_blocks
@@ -214,7 +237,7 @@ def build_core_blocks(self) -> None:
             succ_cbs: list[CoreBlock] = []
             # cur_cb == cb is possible
             for cb in self.core_blocks:
-                if any(d for d in cur_cb.dest if d in cb.source):
+                if any(d for d in cur_cb.dest if d in cb.ordered_axons):
                     succ_cbs.append(cb)
 
             self.succ_core_blocks[cur_cb] = succ_cbs
@@ -274,8 +297,8 @@ def lcn_ex_adjustment(self) -> None:
 
     def cb_axon_grouping(self) -> None:
         """The axons are grouped after the LCN has been modified & locked."""
-        for rg in self.routing_groups:
-            rg.group_axons()
+        for core_block in self.core_blocks:
+            core_block.group_axons()
 
     def graph_optimization(self) -> None:
         optimized = self.graph.graph_optimization(self.core_blocks, self.routing_groups)
@@ -416,7 +439,7 @@ def _inpproj_config_export(self) -> InputNodeConf:
             # LCN of `input_cbs` are the same.
             input_cb = input_cbs[0]
             axon_coords = aligned_coords(
-                slice(0, input_cb.n_axon_of(input_cb.source.index(inode)), 1),
+                slice(0, input_cb.n_axon_of(input_cb.ordered_axons.index(inode)), 1),
                 input_cb.axon_segments[inode],
                 1,
                 input_cb.n_timeslot,
@@ -646,7 +669,7 @@ def find_axon(self, neuron: Neuron, *, verbose: int = 0) -> None:
 
         for cb in self.core_blocks:
             # Find neuron in one or more core blocks.
-            if neuron in cb.source:
+            if neuron in cb.ordered_axons:
                 print(f"axons {neuron.name} placed in {cb.name}, LCN_{1 << cb.lcn_ex}X")
                 axon_segment = cb.axon_segments[neuron]
                 print(
@@ -663,11 +686,35 @@ def _find_dest_cb_by_nseg(
         self, neu_seg: NeuSegment, cb: CoreBlock
     ) -> list[CoreBlock]:
         succ_cbs = self.succ_core_blocks[cb]
-        dest_cb_of_nseg = [cb for cb in succ_cbs if neu_seg.target in cb.source]
+        dest_cb_of_nseg = [cb for cb in succ_cbs if neu_seg.target in cb.ordered_axons]
 
         return dest_cb_of_nseg
 
 
+def cycle_merge(merged_sgrps: list[MergedSuccGroup]):
+    succ_merged_sgrps: dict[MergedSuccGroup, list[MergedSuccGroup]] = dict()
+    for msgrp in merged_sgrps:
+        succ_merged_sgrps[msgrp] = []
+        nodes = set(msgrp.nodes)
+        for _msgrp in merged_sgrps:
+            if msgrp == _msgrp:
+                continue
+            if not nodes.isdisjoint(_msgrp.input_nodes):
+                succ_merged_sgrps[msgrp].append(_msgrp)
+
+    cycles: list[list[MergedSuccGroup]] = find_cycles(succ_merged_sgrps)
+    merged_cycles: list[list[MergedSuccGroup]] = merge_overlap(cycles)
+
+    processed_merged_cycles: list[MergedSuccGroup] = list()
+    remaining_merged_sgrps: set[MergedSuccGroup] = set(merged_sgrps)
+    for merged_cycle in merged_cycles:
+        processed_merged_cycles.append(MergedSuccGroup.merge(merged_cycle))
+        for msgrp in merged_cycle:
+            remaining_merged_sgrps.remove(msgrp)
+    processed_merged_cycles.extend(remaining_merged_sgrps)
+    return processed_merged_cycles
+
+
 def group_by(dict_: dict, keyfunc=lambda item: item):
     """Groups the given list or dictionary by the value returned by ``keyfunc``."""
     d = defaultdict(list)
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 364a397d..e77e0035 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -93,7 +93,7 @@ def __init__(
         self._parents = parents
         self.rt_mode = mode
         self.seed = seed
-        self._lcn_ex = self._n_axon2lcn_ex()
+        self._lcn_ex = LCN_EX.LCN_1X
 
         self.target_lcn = LCN_EX.LCN_1X
         self._lcn_locked = False
@@ -102,7 +102,7 @@ def __init__(
         self.core_placements = dict()
         self.axon_segments = dict()
         self.neuron_segs_of_cb = []
-        self.ordered_axons: list[SourceNodeType] = []
+        self._ordered_axons: list[SourceNodeType] = []
         """Axons in private + multicast order."""
 
     def group_neurons(
@@ -172,7 +172,7 @@ def obj(self) -> tuple[FullConnectedSyn, ...]:
 
     @property
     def shape(self) -> tuple[int, int]:
-        return (len(self.source), len(self.dest))
+        return (len(self.ordered_axons), len(self.dest))
 
     @property
     def source(self) -> list[SourceNodeType]:
@@ -190,7 +190,7 @@ def dest(self) -> list[DestNodeType]:
 
     def n_axon_of(self, index: int) -> int:
         """Get the #N of axons of `index`-th source neuron."""
-        return self.axons[index].num_out
+        return self.ordered_axons[index].num_out
 
     """Boundary limitations"""
 
@@ -275,7 +275,7 @@ def pool_max(self) -> MaxPoolingEnable:
 
     @property
     def n_axon(self) -> int:
-        return sum(s.num_out for s in self.axons)
+        return sum(s.num_out for s in self.ordered_axons)
 
     @property
     def n_fanout(self) -> int:
@@ -307,17 +307,21 @@ def n_neuron_of_plm(self) -> list[int]:
             for neuron_segs in self.neuron_segs_of_cb
         ]
 
-    def group_axons(self, multicast_axons: list[SourceNodeType] = []) -> None:
+    @property
+    def ordered_axons(self) -> list[SourceNodeType]:
+        return self._ordered_axons
+
+    @ordered_axons.setter
+    def ordered_axons(self, axons: list[SourceNodeType]):
+        self._ordered_axons = axons
+        self._lcn_ex = self._n_axon2lcn_ex()
+
+    def group_axons(self) -> None:
         """Group the axons, including the private & the multicast parts.
 
         NOTE: Take the union of the private axons & the multicast axons, but sort the multicast axons first, then the \
             axons that are in the private part and not in the multicast part.
         """
-        if not self._lcn_locked:
-            raise GraphBuildError("group axons after 'lcn_ex' is locked.")
-
-        axons = multicast_axons + [ax for ax in self.axons if ax not in multicast_axons]
-        self.ordered_axons = axons
         self.axon_segments = get_axon_segments(
             self.ordered_axons, self.n_timeslot, self.n_fanin_base
         )
@@ -435,6 +439,13 @@ def export_core_plm_config(cls, cb: "CoreBlock") -> CoreConfInChip:
 
         return cb_config
 
+    def dump(self, i: int = 0) -> None:
+        tabs = "\t" * i
+        print(f"{tabs}{self.name} with {self.n_core_required} cores:")
+        print(f"{tabs}\tLCN: {self.lcn_ex}")
+        for edge in self._parents:
+            print(f"{tabs}\t{edge.name}: {edge.source.name} -> {edge.target.name}")
+
 
 class CorePlacement(CoreAbstract):
     parent: CoreBlock
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 4863b112..9154d185 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -51,12 +51,15 @@ class RoutingGroup:
     """Class counter for debugging."""
 
     def __init__(
-        self, unordered_cb: list[CoreBlock], ordered_rgrp: list["RoutingGroup"]
+        self,
+        unordered_elems: list[Union[CoreBlock, "RoutingGroup"]],
+        ordered_elems: list["RoutingGroup"],
+        is_root: bool = False,
     ) -> None:
-        self.unordered_cb: list[CoreBlock] = unordered_cb
-        self.ordered_rgrp: list["RoutingGroup"] = ordered_rgrp
+        self.unordered_elems: list[Union[CoreBlock, "RoutingGroup"]] = unordered_elems
+        self.ordered_elems: list["RoutingGroup"] = ordered_elems
         self.routing_elems: list[Union[CoreBlock, "RoutingGroup"]] = (
-            unordered_cb + ordered_rgrp
+            unordered_elems + ordered_elems
         )
         self.offset: list[int] = []  # TODO Change a name
         self.n_core_required: int = 0
@@ -70,6 +73,11 @@ def __init__(
 
         self.axons: list[SourceNodeType] = list(axons)  # unordered
 
+        dest: set[DestNodeType] = set()
+        for elem in self.routing_elems:
+            dest.update(elem.dest)
+        self.dest: list[DestNodeType] = list(dest)
+
         self.assigned_coords: list[Coord] = []
         """Assigned core coordinates in the routing group"""
         self.wasted_coords: list[Coord] = []
@@ -77,17 +85,56 @@ def __init__(
         self.wasted_core_plm: dict[Coord, EmptyCorePlacement] = {}
         """Wasted core placements"""
 
+        # can not use set here, order matters
+        self.global_axons: list[SourceNodeType] = []
+        """multicast axons inheritted from the parent routing group"""
+        self.private_axons: list[SourceNodeType] = []
+        """multicast axons only effective in the current routing group"""
+
         """Status options"""
         self.is_assigned = False
         """Whether the coordinates of chip & cores are assigned."""
+        self.is_root = is_root
 
         # For debugging
         self._id = RoutingGroup._debug_id
         RoutingGroup._debug_id += 1
 
+        if is_root:
+            self.set_axons()
+
+    def set_axons(self, multicast_axons: list[SourceNodeType] = []) -> None:
+        """Set the multicast axons for the routing group."""
+        self.global_axons = multicast_axons
+        ax_shared_times: list[int] = [0] * len(self.axons)
+
+        used_axons: set[SourceNodeType] = set()
+        for elem in self.routing_elems:
+            # all axon of coreblocks should be multicast to the whole routing group
+            # because this routing group is the only coord that can access the coreblocks
+            if isinstance(elem, CoreBlock):
+                for axon in elem.axons:
+                    if axon not in self.global_axons and axon not in self.private_axons:
+                        self.private_axons.append(axon)
+            else:
+                for axon in elem.axons:
+                    if axon not in self.global_axons and axon not in self.private_axons:
+                        if axon in used_axons:
+                            self.private_axons.append(axon)
+                        else:
+                            used_axons.add(axon)
+
+        for elem in self.routing_elems:
+            if isinstance(elem, RoutingGroup):
+                elem.set_axons(self.global_axons + self.private_axons)
+            else:
+                # coreblocks in the routing group shuold reserve space for
+                # all axons that multicast to the routing group
+                elem.ordered_axons = self.global_axons + self.private_axons
+
     def set_core_required(self) -> None:
         """Calculate the number of cores required for the routing group iteratively."""
-        for rgrp in self.ordered_rgrp:
+        for rgrp in self.ordered_elems:
             rgrp.set_core_required()
 
         # Record the used cores of the members, but not the actual amount.
@@ -95,14 +142,14 @@ def set_core_required(self) -> None:
 
         # Unordered core blocks sorted in descending order, avoiding assigning waste.
         unordered_cb = sorted(
-            self.unordered_cb, key=lambda x: x.n_core_required, reverse=True
+            self.unordered_elems, key=lambda x: x.n_core_required, reverse=True
         )
         for cb in unordered_cb:
             self.offset.append(self.n_core_required)
             n_core_used += cb.n_core_required
 
         # Ordered routing groups should be assgined first.
-        ordered_rgrp = self.ordered_rgrp
+        ordered_rgrp = self.ordered_elems
         for rgrp in ordered_rgrp:
             n_core_assigned = _nearest_multiple_above(n_core_used, rgrp.n_core_required)
             self.offset.append(n_core_assigned)
@@ -154,28 +201,57 @@ def assign_coord(
 
         return self.assigned_coords, self.wasted_coords
 
-    def group_axons(self, multicast_axons: list[SourceNodeType] = []) -> None:
-        """Group the axons, using list to keep the order of axons."""
-        if not all(cb._lcn_locked for cb in self.core_blocks):
-            raise GraphBuildError(
-                "get axon segments of core block after 'lcn_ex' is locked."
+    def optimize_group(self) -> list["RoutingGroup"]:
+        optimized_unordered: list[Union[CoreBlock, "RoutingGroup"]] = list()
+        optimized_ordered: list["RoutingGroup"] = list()
+        for elem in self.unordered_elems:
+            if isinstance(elem, RoutingGroup):
+                optimized_unordered += elem.optimize_group()
+            else:
+                optimized_unordered.append(elem)
+        for elem in self.ordered_elems:
+            optimized_ordered += elem.optimize_group()
+
+        # If one sub routing group in elems does not use
+        # the private multicast axons, then make it independent.
+
+        # coreblocks in the routing group always use the private multicast axons
+        # otherwise, this coreblock should not in the routing group
+        unordered_groups: list["RoutingGroup"] = list()
+        remaining_unordered: list[Union[CoreBlock, "RoutingGroup"]] = list()
+        for elem in optimized_unordered:
+            if isinstance(elem, CoreBlock):
+                remaining_unordered.append(elem)
+            elif not set(self.private_axons).isdisjoint(elem.axons):
+                remaining_unordered.append(elem)
+            else:
+                unordered_groups.append(elem)
+
+        ordered_groups: list["RoutingGroup"] = list()
+        remaining_ordered: list["RoutingGroup"] = list()
+        inputs: set[DestNodeType] = set()
+        for elem in reversed(optimized_ordered):
+            if not set(self.private_axons).isdisjoint(elem.axons):
+                inputs.update(elem.axons)
+                remaining_ordered.insert(0, elem)
+            elif not inputs.isdisjoint(elem.dest):
+                inputs.update(elem.dest)
+                remaining_ordered.insert(0, elem)
+            else:
+                elem.global_axons = self.global_axons
+                elem.is_root = self.is_root
+                ordered_groups.insert(0, elem)
+
+        optimized_groups: list["RoutingGroup"] = list()
+        if len(remaining_unordered) > 0:
+            optimized_groups.append(
+                RoutingGroup(remaining_unordered, remaining_ordered, self.is_root)
             )
 
-        private_multicast_axons = multicast_axons.copy()
-        ax_shared_times: list[int] = [0] * len(self.axons)
-
-        # Axons shared within a routing group also need to be multicast.
-        for elem in self.routing_elems:
-            for ax in elem.axons:
-                idx = self.axons.index(ax)
-                ax_shared_times[idx] += 1
+        # can not change the order here
+        optimized_groups = unordered_groups + optimized_groups + ordered_groups
 
-        for ax, times in zip(self.axons, ax_shared_times):
-            if times > 1 and ax not in private_multicast_axons:
-                private_multicast_axons.append(ax)
-
-        for elem in self.routing_elems:
-            elem.group_axons(private_multicast_axons)
+        return optimized_groups
 
     @property
     def core_blocks(self) -> list[CoreBlock]:
@@ -191,17 +267,26 @@ def core_blocks(self) -> list[CoreBlock]:
         return cbs
 
     @classmethod
-    def build(cls, merged_sgrp: MergedSuccGroup) -> "RoutingGroup":
+    def build(
+        cls, merged_sgrp: MergedSuccGroup, is_root: bool = False
+    ) -> "RoutingGroup":
         msgrp = MergedSuccGroup()
         remaining = MergedSuccGroup()
-
+        sub_nodes = set()
+        remaining_nodes = set()
         for group in merged_sgrp.groups:
             if group.input in merged_sgrp.nodes:
+                sub_nodes.update(group.nodes)
+        remaining_nodes = merged_sgrp.nodes - sub_nodes
+
+        for group in merged_sgrp.groups:
+            if not sub_nodes.isdisjoint(group.nodes):
                 msgrp.add_group(group)
-            else:
+            if not remaining_nodes.isdisjoint(group.nodes):
                 remaining.add_group(group)
 
-        remaining.nodes -= msgrp.nodes
+        remaining.nodes &= remaining_nodes
+        msgrp.nodes &= sub_nodes
         unordered_cb = CoreBlock.build_core_blocks(remaining)
 
         if len(msgrp.nodes) > 0:
@@ -210,7 +295,7 @@ def build(cls, merged_sgrp: MergedSuccGroup) -> "RoutingGroup":
         else:
             ordered_rgrp = []
 
-        return cls(unordered_cb, ordered_rgrp)
+        return cls(unordered_cb, ordered_rgrp, is_root)
 
     def core_block_alloc(self) -> None:
         assert self.is_assigned, "coordinates are not assigned."
@@ -245,15 +330,15 @@ def chip_coord(self) -> ChipCoord:
     def dump(self, i: int = 0) -> None:
         tabs = "\t" * i
         print(f"{tabs}RoutingGroup: {self} with {self.n_core_required} cores:")
+        print(
+            f"{tabs}multicast axons: {[axon.name for axon in self.global_axons + self.private_axons]}"
+        )
         for elem in self.routing_elems:
             if isinstance(elem, RoutingGroup):
                 elem.dump(i + 1)
             else:
-                print(f"{tabs}\t{elem.name} with {elem.n_core_required} cores:")
-                for edge in elem._parents:
-                    print(
-                        f"{tabs}\t\t{edge.name}: {edge.source.name} -> {edge.target.name}"
-                    )
+                elem.dump(i + 1)
+        print()
 
     def __contains__(self, cb: CoreBlock) -> bool:
         return cb in self.core_blocks
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index dacf9dcd..778e3853 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -129,6 +129,7 @@ class MergedSuccGroup:
     def __init__(self, *init_sgrp: SuccGroup) -> None:
         self.nodes: set[NodeType] = set()
         self.groups: list[SuccGroup] = list()
+        self.input_nodes: list[NodeType] = list()
 
         if init_sgrp:
             for sgrp in init_sgrp:
@@ -137,6 +138,7 @@ def __init__(self, *init_sgrp: SuccGroup) -> None:
     def add_group(self, group: SuccGroup) -> None:
         self.groups.append(group)
         self.nodes.update(group.nodes)
+        self.input_nodes.append(group.input)
 
     @property
     def outputs(self) -> dict[NodeType, list[EdgeType]]:
@@ -148,6 +150,22 @@ def outputs(self) -> dict[NodeType, list[EdgeType]]:
 
         return onodes
 
+    @property
+    def num_in(self) -> int:
+        return sum(input_node.num_out for input_node in self.input_nodes)
+
+    @classmethod
+    def merge(cls, merged_sgrps: list["MergedSuccGroup"]) -> "MergedSuccGroup":
+        merged = cls()
+        for merged_sgrp in merged_sgrps:
+            merged.nodes.update(merged_sgrp.nodes)
+            merged.groups.extend(merged_sgrp.groups)
+            merged.input_nodes.extend(merged_sgrp.input_nodes)
+        return merged
+
+    def __hash__(self) -> int:
+        return hash(tuple(self.nodes))
+
     def dump(self) -> None:
         print("MergedSuccGroup:")
         for group in self.groups:
diff --git a/tests/backend/conftest.py b/tests/backend/conftest.py
index ad7e94a2..fc7b18c7 100644
--- a/tests/backend/conftest.py
+++ b/tests/backend/conftest.py
@@ -127,6 +127,62 @@ def __init__(self, large_scale: bool = False):
         )
 
 
+class NetForTest5(pb.Network):
+    def __init__(self):
+        super().__init__()
+        self.n1 = pb.InputProj(input=None, shape_out=(400,), name="n1")
+        self.n2 = pb.TonicSpiking(400, 3, name="n2")
+        self.n3 = pb.TonicSpiking(400, 3, name="n3")
+        self.n4 = pb.TonicSpiking(400, 3, name="n4")
+        self.n5 = pb.TonicSpiking(800, 3, name="n5")
+        self.n6 = pb.TonicSpiking(400, 4, name="n6")
+        self.s0 = pb.FullConn(
+            self.n1, self.n2, conn_type=pb.SynConnType.All2All, name="s0"
+        )
+        self.s1 = pb.FullConn(
+            self.n2, self.n3, conn_type=pb.SynConnType.All2All, name="s1"
+        )
+        self.s2 = pb.FullConn(
+            self.n3, self.n4, conn_type=pb.SynConnType.All2All, name="s2"
+        )
+        self.s3 = pb.FullConn(
+            self.n4, self.n5, conn_type=pb.SynConnType.All2All, name="s3"
+        )
+        self.s4 = pb.FullConn(
+            self.n5, self.n6, conn_type=pb.SynConnType.All2All, name="s4"
+        )
+        self.s5 = pb.FullConn(
+            self.n1, self.n6, conn_type=pb.SynConnType.All2All, name="s5"
+        )
+        self.s6 = pb.FullConn(
+            self.n2, self.n5, conn_type=pb.SynConnType.All2All, name="s6"
+        )
+
+
+class NetForTest6(pb.Network):
+    def __init__(self):
+        super().__init__()
+        self.n1 = pb.InputProj(input=None, shape_out=(400,), name="n1")
+        self.n2 = pb.InputProj(input=None, shape_out=(400,), name="n2")
+        self.n3 = pb.TonicSpiking(400, 3, name="n3")
+        self.n4 = pb.TonicSpiking(400, 3, name="n4")
+        self.s0 = pb.FullConn(
+            self.n1, self.n3, conn_type=pb.SynConnType.All2All, name="s0"
+        )
+        self.s1 = pb.FullConn(
+            self.n1, self.n4, conn_type=pb.SynConnType.All2All, name="s1"
+        )
+        self.s2 = pb.FullConn(
+            self.n2, self.n3, conn_type=pb.SynConnType.All2All, name="s2"
+        )
+        self.s3 = pb.FullConn(
+            self.n2, self.n4, conn_type=pb.SynConnType.All2All, name="s3"
+        )
+        self.s4 = pb.FullConn(
+            self.n3, self.n4, conn_type=pb.SynConnType.All2All, name="s4"
+        )
+
+
 class Network_with_multi_inodes1(pb.Network):
     """Test the following situations with multiple input nodes:
         1. Two input nodes with their own core blocks.
@@ -682,6 +738,16 @@ def build_example_net4():
     return NetForTest4()
 
 
+@pytest.fixture(scope="class")
+def build_example_net5():
+    return NetForTest5()
+
+
+@pytest.fixture(scope="class")
+def build_example_net6():
+    return NetForTest6()
+
+
 @pytest.fixture(scope="class")
 def build_example_net4_large_scale():
     return NetForTest4(large_scale=True)
diff --git a/tests/backend/test_mapper.py b/tests/backend/test_mapper.py
index 9bd0022b..98db5144 100644
--- a/tests/backend/test_mapper.py
+++ b/tests/backend/test_mapper.py
@@ -375,15 +375,18 @@ def test_grouping_optim_core(self, monkeypatch, build_example_net4):
         mapper.build(net)
         mapper.compile(grouping_optim_target="core")
 
-        assert mapper.core_blocks[0].n_core_required == ceil(
-            net.n1.num_out / HwConfig.N_DENDRITE_MAX_SNN
-        )
-
-        assert mapper.core_blocks[1].n_core_required == 1 + 1
+        for cb in mapper.core_blocks:
+            if net.n1 in cb.dest:
+                assert cb.n_core_required == ceil(
+                    net.n1.num_out / HwConfig.N_DENDRITE_MAX_SNN
+                )
+            elif net.n2 in cb.dest:
+                assert cb.n_core_required == 1 + 1
 
-        assert mapper.core_blocks[2].n_core_required == ceil(
-            net.n4.num_out / HwConfig.N_DENDRITE_MAX_SNN
-        )
+            elif net.n4 in cb.dest:
+                assert cb.n_core_required == ceil(
+                    net.n4.num_out / HwConfig.N_DENDRITE_MAX_SNN
+                )
 
     def test_grouping_optim_both(self, monkeypatch, build_example_net4):
         net = build_example_net4
@@ -448,6 +451,29 @@ def __init__(self):
             multicast_optim=[net.n0],
         )
 
+    def test_ordered_axons(self, build_example_net5):
+        net = build_example_net5
+        mapper = pb.Mapper()
+        mapper.build(net)
+        mapper.compile()
+        nodes_with_empty_axons = [net.n3, net.n4, net.n5]
+        for cb in mapper.core_blocks:
+            if cb.dest[0] in nodes_with_empty_axons:
+                assert len(cb.ordered_axons) > len(cb.source)
+            else:
+                assert len(cb.ordered_axons) == len(cb.source)
+
+    def test_partition(self, build_example_net6):
+        net = build_example_net6
+        mapper = pb.Mapper()
+        mapper.build(net)
+        mapper.compile()
+        for cb in mapper.core_blocks:
+            if net.n3 in cb.dest:
+                assert len(cb.ordered_axons) == 2
+            if net.n4 in cb.dest:
+                assert len(cb.ordered_axons) == 3
+
     def test_core_estimate_only(self, build_example_net4):
         net = build_example_net4
 

From d9515d8fd65077029fac760f4fcb948e19b95513 Mon Sep 17 00:00:00 2001
From: yang1556 <92725391+yang1556@users.noreply.github.com>
Date: Fri, 29 Nov 2024 23:50:09 +0800
Subject: [PATCH 146/187] support rinbuffer

---
 paibox/components/_modules.py   |  5 +++--
 paibox/components/functional.py | 15 +++++++++------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index ea22fbe3..41da1bca 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -212,13 +212,14 @@ def _input_buffer_len_check(
                 math.ceil(in_channels * in_h * kw / HwConfig.N_FANIN_PER_DENDRITE_ANN)
             )
         )
-
-        if not kw * valid_interval > HwConfig.N_TIMESLOT_MAX / (2**E):
+        deep = min(in_h - kw, kw - 1) * valid_interval + 1
+        if not HwConfig.N_TIMESLOT_MAX / (2**E) > deep:
             raise ResourceError(
                 f"the input size of {self.name} is too large. Please adjust the input size or the number of channels."
             )
 
 
+
 class _LinearBase(FunctionalModule):
     def __init__(
         self,
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 423af93c..732dd2f0 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -910,8 +910,13 @@ def build(
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
         self.ostream_attr = incoming_stream_attr
+        twe = 1 + self.ostream_attr.t_last_vld
+
 
         ich, ih = self.source[0].shape_out
+
+        if build_options.get("check_before_compile"):
+            self._input_buffer_len_check(ich, ih, ih, incoming_stream_attr.interval)
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
@@ -932,7 +937,7 @@ def build(
                 shape=(ich, ih),
                 delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=self.tick_wait_end,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1049,7 +1054,6 @@ def build(
             ow,
         )
         twe = 1 + self.ostream_attr.t_last_vld
-
         if build_options.get("check_before_compile"):
             self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
 
@@ -1069,13 +1073,12 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
-
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
                 delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 name=f"n{i}_delay_{self.name}",
             )
             n_delays.append(neuron)
@@ -1235,7 +1238,7 @@ def build(
                 (cin, ih),
                 delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1367,7 +1370,7 @@ def build(
                 (cin, ih),
                 delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )

From 1a8372e112c3807822a2daf20932ff65360b3e84 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Dec 2024 00:51:05 +0000
Subject: [PATCH 147/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/components/_modules.py   | 1 -
 paibox/components/functional.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 41da1bca..990b5a77 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -219,7 +219,6 @@ def _input_buffer_len_check(
             )
 
 
-
 class _LinearBase(FunctionalModule):
     def __init__(
         self,
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 732dd2f0..37aa6fc9 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -912,7 +912,6 @@ def build(
         self.ostream_attr = incoming_stream_attr
         twe = 1 + self.ostream_attr.t_last_vld
 
-
         ich, ih = self.source[0].shape_out
 
         if build_options.get("check_before_compile"):

From 3298c5512f21d3a59ef5546ec24e7bef63f55974 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 10:19:04 +0800
Subject: [PATCH 148/187] =?UTF-8?q?=E2=9C=85=20update=20the=20testcase=20`?=
 =?UTF-8?q?test=5Fgroup=5Fedges=5Fwith=5Fconstrs`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/backend/test_graphs.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tests/backend/test_graphs.py b/tests/backend/test_graphs.py
index 6d3de2f0..9491ee0b 100644
--- a/tests/backend/test_graphs.py
+++ b/tests/backend/test_graphs.py
@@ -467,14 +467,18 @@ def test_group_edges_with_constrs(
         # In this case, N2 & N3 should be together.
         pos_n2 = pos_n3 = 0
         for i, cb in enumerate(mapper.core_blocks):
-            _g_with_name = [e.name for e in cb._parents]
+            _g_with_name = [e.name for e in cb.obj]
             if "s2" in _g_with_name:
                 pos_n2 = i
+                break
+
+        for i, cb in enumerate(mapper.core_blocks):
+            _g_with_name = [e.name for e in cb.obj]
             if "s3" in _g_with_name:
                 pos_n3 = i
+                break
 
         assert pos_n2 == pos_n3
-        assert pos_n2 != 0
 
         # In this case, N2 & N3 should be split.
         monkeypatch.setattr(net.n2, "_tws", 2)
@@ -486,11 +490,16 @@ def test_group_edges_with_constrs(
 
         pos_n2 = pos_n3 = 0
         for i, part in enumerate(mapper.core_blocks):
-            _g_with_name = [e.name for e in part._parents]
+            _g_with_name = [e.name for e in part.obj]
             if "s2" in _g_with_name:
                 pos_n2 = i
+                break
+
+        for i, part in enumerate(mapper.core_blocks):
+            _g_with_name = [e.name for e in part.obj]
             if "s3" in _g_with_name:
                 pos_n3 = i
+                break
 
         assert pos_n2 != pos_n3
 

From 7608c4ded549468631834e6a0e3b1b02f491f0c2 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Thu, 28 Nov 2024 20:45:41 +0800
Subject: [PATCH 149/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix:=20fix=20the=20in?=
 =?UTF-8?q?put=20buffer=20limit=20check=20for=20semi-folded=20ops?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 990b5a77..30ca7397 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -201,21 +201,21 @@ def build(
         raise NotImplementedError
 
     def _input_buffer_len_check(
-        self, in_channels: int, in_h: int, kw: int, valid_interval: int
+        self, ich: int, ih: int, kw: int, interval: int
     ) -> None:
         """Check the limit of the semi-folded operators on the input buffer length of the core during the build phase.
 
-        NOTE: If the condition is not met, an expection will be raised in the subsequent compilation phase.
+        NOTE: The right side of the inequality will only be smaller in the backend. If the condition is not met, an \
+            expection will be raised in the subsequent compilation phase.
         """
         E = math.ceil(
-            math.log2(
-                math.ceil(in_channels * in_h * kw / HwConfig.N_FANIN_PER_DENDRITE_ANN)
-            )
+            math.log2(math.ceil(ich * ih * kw / HwConfig.N_FANIN_PER_DENDRITE_ANN))
         )
-        deep = min(in_h - kw, kw - 1) * valid_interval + 1
-        if not HwConfig.N_TIMESLOT_MAX / (2**E) > deep:
+
+        if min(ih - kw, kw - 1) * interval + 1 >= (HwConfig.N_TIMESLOT_MAX >> E):
+            _adjust_text = "input size, kernel size or stride along the data flow."
             raise ResourceError(
-                f"the input size of {self.name} is too large. Please adjust the input size or the number of channels."
+                f"the data arrangement of {self.name}'s input buffer may be wrong. Please adjust the {_adjust_text}."
             )
 
 

From e0c18d7d28bfa5e2daa724c4790b897105ed8fc2 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 14:16:04 +0800
Subject: [PATCH 150/187] =?UTF-8?q?=E2=9C=A8=20feat(base):=20use=20`DataSt?=
 =?UTF-8?q?reamFormat`=20to=20descriibe=20the=20format=20of=20dataflow.=20?=
 =?UTF-8?q?Update=20the=20update=20logic=20of=20dataflow=20format=20betwee?=
 =?UTF-8?q?n=20semi-folded=20ops.=20Labeling=20the=20dataflow=20format=20o?=
 =?UTF-8?q?n=20neurons.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/base.py                      |  51 ++++++++++++
 paibox/components/_modules.py       |  36 ++------
 paibox/components/functional.py     | 123 +++++++++++++++++-----------
 paibox/components/neuron/base.py    |  46 ++++++++++-
 paibox/network.py                   |  14 ++--
 tests/components/test_functional.py |  13 +--
 6 files changed, 189 insertions(+), 94 deletions(-)

diff --git a/paibox/base.py b/paibox/base.py
index 31e25387..89ffdbd0 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 import sys
 from typing import Any, ClassVar, Literal, Optional
 
@@ -256,6 +257,45 @@ def state(self) -> NodeDict:
         return self._memories
 
 
+INFINITE_DATAFLOW = 0  # the dataflow is infinite.
+
+
+@dataclass
+class DataFlowFormat:
+    """Describe in detail the format of valid data in the dataflow."""
+
+    t_1st_vld: int
+    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
+    interval: int = 1
+    """The interval of valid data in the flow."""
+    n_vld: int = INFINITE_DATAFLOW
+    """The number of valid data. 0 for infinite dataflow."""
+
+    def __post_init__(self) -> None:
+        if self.n_vld < INFINITE_DATAFLOW:
+            raise ValueError(
+                f"'n_vld' should be greater than or equal to {INFINITE_DATAFLOW}, "
+                f"but got {self.n_vld}."
+            )
+
+    def t_at_idx(self, idx: int) -> int:
+        """The time of the valid data at the given index."""
+        if self.n_vld > INFINITE_DATAFLOW:
+            assert 0 <= idx <= self.n_vld - 1
+
+        return self.t_1st_vld + idx * self.interval
+
+    def t_at_n(self, n: int) -> int:
+        """The time of the n-th valid data."""
+        return self.t_at_idx(n - 1)
+
+    @property
+    def t_last_vld(self) -> int:
+        """The time of the last valid data."""
+        assert self.n_vld > INFINITE_DATAFLOW
+        return self.t_at_n(self.n_vld)
+
+
 class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
 
     _delay: int
@@ -266,6 +306,9 @@ class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
     _uf: int
     """unrolling_factor"""
 
+    oflow_format: DataFlowFormat
+    """The format of output data stream"""
+
     def __init__(self, name: Optional[str] = None) -> None:
         super().__init__(name)
         self.master_nodes = NodeDict()
@@ -291,6 +334,14 @@ def tick_wait_end(self) -> int:
     def unrolling_factor(self) -> int:
         return self._uf
 
+    @property
+    def end_tick(self) -> int:
+        """End time of work."""
+        if self.tick_wait_end == 0:
+            return 9999  # Never end
+
+        return self.tick_wait_start + self.tick_wait_end - 1
+
     @unrolling_factor.setter
     def unrolling_factor(self, factor: int) -> None:
         self._uf = arg_check_pos(factor, "'unrolling_factor'")
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 30ca7397..a2a0953d 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,12 +1,10 @@
-import math
 import typing
-from dataclasses import dataclass
 from typing import Literal, Optional, Union
 
 import numpy as np
 from paicorelib import TM, HwConfig
 
-from paibox.base import NeuDyn, NodeList
+from paibox.base import DataFlowFormat, NeuDyn, NodeList
 from paibox.exceptions import ResourceError, ShapeError
 from paibox.types import (
     LEAK_V_DTYPE,
@@ -58,7 +56,7 @@
     "_SpikingPool2dWithV",
     "_SemiFoldedModule",
     "_LinearBase",
-    "SemiFoldedStreamAttr",
+    "SemiFoldedDataFlowFormat",
 ]
 
 
@@ -161,41 +159,21 @@ class _DelayChainANN(_DelayChainBase):
     pass
 
 
-@dataclass(frozen=True)
-class SemiFoldedStreamAttr:
-    """Details of transmission of valid data in semi-folded form data stream."""
-
-    t_1st_vld: int
-    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
-    interval: int
-    """The interval of the output data stream."""
-    n_data: int = 0
-    """The number of valid output data."""
-
-    def t_at(self, n: int) -> int:
-        """The time of the n-th valid data."""
-        if self.n_data > 0:
-            assert 1 <= n <= self.n_data
-
-        return self.t_1st_vld + (n - 1) * self.interval
-
-    @property
-    def t_last_vld(self) -> int:
-        """The time of the last valid data."""
-        assert self.n_data > 0
-        return self.t_at(self.n_data)
+class SemiFoldedDataFlowFormat(DataFlowFormat):
+    pass
 
 
 @set_rt_mode_ann()
 class _SemiFoldedModule(FunctionalModule):
     """Functional modules with interfaces in semi-folded form. Use `build()` of class `HasSemiFoldedIntf`."""
 
-    ostream_attr: SemiFoldedStreamAttr
+    inherent_delay = 1
+    oflow_format: SemiFoldedDataFlowFormat
 
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         raise NotImplementedError
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 37aa6fc9..ae0370e6 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -905,22 +905,26 @@ class LinearSemiFolded(_LinearBase, _SemiFoldedModule):
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
-        self.ostream_attr = incoming_stream_attr
-        twe = 1 + self.ostream_attr.t_last_vld
+        # For semi-folded linear, the valid output is at only one timestep.
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_last_vld, 1, 1
+        )
+        twe = 1 + self.oflow_format.t_last_vld
 
         ich, ih = self.source[0].shape_out
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(ich, ih, ih, incoming_stream_attr.interval)
+            self._input_buffer_len_check(ich, ih, ih, incoming_flow_format.interval)
+
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
 
-        n_fc = ANNNeuron(
+        n_linear = ANNNeuron(
             self.shape_out,
             self.bias,
             self.bit_trunc,
@@ -930,13 +934,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_linear.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
 
         for i in range(ih):
             neuron = ANNBypassNeuron(
                 shape=(ich, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -954,15 +961,15 @@ def build(
             w = self.weights[ih - i - 1 :: ih, :]
             syn2 = FullConnSyn(
                 neuron,
-                n_fc,
+                n_linear,
                 weights=w,
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
             )
             s_weight.append(syn2)
 
-        generated = [n_fc, *n_delays, *s_delays, *s_weight]
-        self._rebuild_out_intf(network, n_fc, *generated, **build_options)
+        generated = [n_linear, *n_delays, *s_delays, *s_weight]
+        self._rebuild_out_intf(network, n_linear, *generated, **build_options)
 
         return generated
 
@@ -1008,9 +1015,11 @@ def __init__(
         # XXX Do not consider the case when the shape of source neurons needs to be changed, for now.
         # neuron_s.shape_change((in_ch, in_h))
 
-        cout, cin, kh, _ = kernel.shape
+        cout, cin, kh, kw = kernel.shape
         out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
 
+        assert self.padding[0] < kh and self.padding[1] < kw
+
         if in_ch != cin:
             raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
@@ -1034,7 +1043,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1047,14 +1056,15 @@ def build(
         _, cin, _, kw = self.kernel.shape
         _, ow = self.shape_out
 
-        self.ostream_attr = SemiFoldedStreamAttr(
-            incoming_stream_attr.t_at(kw - self.padding[0]),
-            incoming_stream_attr.interval * self.stride[1],
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_at_n(kw - self.padding[0]),
+            incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.ostream_attr.t_last_vld
+        twe = 1 + self.oflow_format.t_last_vld
+
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
 
         n_delays = NodeList()
         n_neg_padding = NodeList()
@@ -1072,12 +1082,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_conv2d.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
+
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 name=f"n{i}_delay_{self.name}",
             )
             n_delays.append(neuron)
@@ -1105,13 +1119,13 @@ def build(
         # Add additional negative padding layer to eliminate the incorrect output
         # NOTE: `t_1st_vld` = 0 & `padding[0]` > 0 means the previous layer is
         # an input node. No need to add negative padding layer for this case.
-        if incoming_stream_attr.t_1st_vld > 0:
+        if incoming_flow_format.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, ih),
-                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
+                    delay=1 + incoming_flow_format.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=incoming_stream_attr.t_1st_vld,
+                    tick_wait_end=incoming_flow_format.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1196,7 +1210,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1209,20 +1223,20 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.ostream_attr = SemiFoldedStreamAttr(
-            incoming_stream_attr.t_at(kw),
-            incoming_stream_attr.interval * self.stride[1],
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_at_n(kw),
+            incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.ostream_attr.t_last_vld
+        twe = 1 + self.oflow_format.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
 
         n_delays = NodeList()
         s_delays = NodeList()
 
-        pool2d = ANNNeuron(
+        n_pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
@@ -1231,13 +1245,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_pool2d.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
 
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1253,7 +1270,7 @@ def build(
             s_delays.append(syn1)
             syn2 = MaxPoolSyn(
                 neuron,
-                pool2d,
+                n_pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
                     cin, ih, ow, kh, self.stride, (0, 0)
                 ),
@@ -1261,8 +1278,8 @@ def build(
             )
             s_delays.append(syn2)
 
-        generated = [pool2d, *n_delays, *s_delays]
-        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
+        generated = [n_pool2d, *n_delays, *s_delays]
+        self._rebuild_out_intf(network, n_pool2d, *generated, **build_options)
 
         return generated
 
@@ -1302,6 +1319,8 @@ def __init__(
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
         out_h = (in_h - self.kernel_size[0] + 2 * self.padding[0]) // self.stride[0] + 1
+        kh, kw = self.kernel_size
+        assert self.padding[0] < kh and self.padding[1] < kw
 
         super().__init__(
             neuron_s,
@@ -1314,7 +1333,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1327,15 +1346,15 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.ostream_attr = SemiFoldedStreamAttr(
-            incoming_stream_attr.t_at(kw - self.padding[0]),
-            incoming_stream_attr.interval * self.stride[1],
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_at_n(kw - self.padding[0]),
+            incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.ostream_attr.t_last_vld
+        twe = 1 + self.oflow_format.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
 
         # NOTE: Division is achieved with the help of output truncation.
         # TODO Since division with a divisor that is an integer power of 2 can only be implemented by
@@ -1355,7 +1374,7 @@ def build(
         s_delays = NodeList()
         s_neg_padding = NodeList()
 
-        pool2d = ANNNeuron(
+        n_pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             bit_trunc=bit_trunc,
@@ -1364,12 +1383,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_pool2d.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
+
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1385,7 +1408,7 @@ def build(
             s_delays.append(syn1)
             syn2 = FullConnSyn(
                 neuron,
-                pool2d,
+                n_pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
                     cin, ih, ow, kh, self.stride, self.padding
                 ),
@@ -1395,13 +1418,13 @@ def build(
             s_delays.append(syn2)
 
         # Add additional negative padding layer to eliminate the incorrect output
-        if incoming_stream_attr.t_1st_vld > 0:
+        if incoming_flow_format.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, ih),
-                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
+                    delay=1 + incoming_flow_format.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=incoming_stream_attr.t_1st_vld,
+                    tick_wait_end=incoming_flow_format.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1418,7 +1441,7 @@ def build(
 
                 syn2 = FullConnSyn(
                     neuron,
-                    pool2d,
+                    n_pool2d,
                     weights=-_poo2d_semifolded_mapping_mask(
                         cin, ih, ow, kh, self.stride, self.padding
                     ),
@@ -1427,8 +1450,8 @@ def build(
                 )
                 s_neg_padding.append(syn2)
 
-        generated = [pool2d, *n_delays, *n_neg_padding, *s_delays, *s_neg_padding]
-        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
+        generated = [n_pool2d, *n_delays, *n_neg_padding, *s_delays, *s_neg_padding]
+        self._rebuild_out_intf(network, n_pool2d, *generated, **build_options)
 
         return generated
 
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index a35fa8b5..7e226a41 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -20,7 +20,7 @@
     get_core_mode,
 )
 
-from paibox.base import NeuDyn
+from paibox.base import DataFlowFormat, NeuDyn, INFINITE_DATAFLOW
 from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
@@ -487,11 +487,13 @@ def __init__(
             ),
         )
 
-        """Auxiliary internal stateful attributes for debugging"""
+        """Non-stateful attributes."""
         self._delay = arg_check_pos(delay, "'delay'")
         self._tws = arg_check_non_neg(tick_wait_start, "'tick_wait_start'")
         self._twe = arg_check_non_neg(tick_wait_end, "'tick_wait_end'")
         self._uf = arg_check_pos(unrolling_factor, "'unrolling_factor'")
+        # Default dataflow is infinite and continuous, starting at `tws`.
+        self.oflow_format = DataFlowFormat(self.tick_wait_start)
 
     def __len__(self) -> int:
         return self._n_neuron
@@ -529,6 +531,46 @@ def update(
     def reset_state(self, *args, **kwargs) -> None:
         self.reset_memory()  # Call reset of `StatusMemory`.
 
+    def set_oflow_format(
+        self,
+        t_1st_vld: Optional[int] = None,
+        interval: Optional[int] = None,
+        n_vld: Optional[int] = None
+    ) -> None:
+        assert hasattr(self, "oflow_format")
+        _t_1st_vld = (
+            t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
+        )
+
+        _interval = (
+            arg_check_pos(interval, "interval")
+            if isinstance(interval, int)
+            else self.oflow_format.interval
+        )
+
+        _n_vld = (
+            arg_check_non_neg(n_vld, "n_vld")
+            if isinstance(n_vld, int)
+            else self.oflow_format.n_vld
+        )
+
+        if _t_1st_vld < self.tick_wait_start:
+            raise ValueError(
+                f"the output time of the first valid data should be greater than or equal to "
+                f"{self.tick_wait_start}, but got {_t_1st_vld}."
+            )
+
+        if _n_vld > INFINITE_DATAFLOW:
+            if (t_last_vld := _t_1st_vld + (_n_vld - 1) * _interval) > self.end_tick:
+                raise ValueError(
+                    f"valid data is output after the end time. The neuron stops working at "
+                    f"{self.end_tick}, but still needs to output at {t_last_vld}."
+                )
+
+        self.oflow_format.t_1st_vld = _t_1st_vld
+        self.oflow_format.interval = _interval
+        self.oflow_format.n_vld = _n_vld
+
     def __copy__(self) -> "Neuron":
         """Same as `__deepcopy__`."""
         return self.__deepcopy__()
diff --git a/paibox/network.py b/paibox/network.py
index 4f0a4c12..6db83afd 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -7,7 +7,7 @@
 from .base import DynamicSys, SynSys
 from .collector import Collector
 from .components import NeuModule, Neuron, Projection
-from .components._modules import SemiFoldedStreamAttr, _SemiFoldedModule
+from .components._modules import SemiFoldedDataFlowFormat, _SemiFoldedModule
 from .components.modules import BuiltComponentType
 from .exceptions import NotSupportedError
 from .mixin import Container
@@ -102,18 +102,18 @@ def build_modules(
 
         generated = dict()
 
-        # For external input stream info:
-        # 1. The start time is 1
-        # 2. The interval is 1
-        # 3. The #N of data is -1 since it dosen't effect the subsequent output stream.
+        # For external input dataflow:
+        # 1. The start time is 0.
+        # 2. The interval is 1.
+        # 3. The #N of data is `INFINITE_DATA_STREAM` since it dosen't effect the subsequent output dataflow.
         # TODO Reserve an interface for setting the properties of external input from `FRONTEND_ENV`?
-        last_vld_output_attr = SemiFoldedStreamAttr(0, 1)
+        last_vld_output_attr = SemiFoldedDataFlowFormat(t_1st_vld=0)
 
         for m in modules:
             # TODO for the case of the ResBlock, the `pred_dg_semi_ops` will be used.
             if isinstance(m, _SemiFoldedModule):
                 generated[m] = m.build(self, last_vld_output_attr, **build_options)
-                last_vld_output_attr = m.ostream_attr
+                last_vld_output_attr = m.oflow_format
             else:
                 generated[m] = m.build(self, **build_options)
 
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index c2c63a82..56c80471 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -4,6 +4,7 @@
 import paibox as pb
 from paibox.base import DynamicSys
 from paibox.components import NeuModule
+from paibox.components._modules import _SemiFoldedModule
 from paibox.components.neuron.base import MetaNeuron
 from paibox.components.synapses.conv_utils import _conv2d_faster, _pair, _single
 from paibox.network import DynSysGroup
@@ -976,12 +977,12 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim1.add_probe(probe_linear)
 
-        semi_folded_modules = [*conv2d_list, linear]
+        semi_folded_modules: list[_SemiFoldedModule] = [*conv2d_list, linear]
         # The interval & the time o the first valid data of the external input data stream
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_conv
         for i in range(n_conv):
             if i == 0:
@@ -1049,7 +1050,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
+                    linear.tick_wait_start + linear.oflow_format.t_last_vld
                 ],
             )
 
@@ -1171,12 +1172,12 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim1.add_probe(probe_linear)
 
-        semi_folded_modules = [*pool2d_list, linear]
+        semi_folded_modules: list[_SemiFoldedModule] = [*pool2d_list, linear]
         # The interval & the time o the first valid data of the external input data stream
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_pool
         for i in range(n_pool):
             if i == 0:
@@ -1231,7 +1232,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
+                    linear.tick_wait_start + linear.oflow_format.t_last_vld
                 ],
             )
 

From 3122dc6b3878aff7644ed0538d9a515525670da0 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 14:19:58 +0800
Subject: [PATCH 151/187] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20support=20py3.13?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/codecov.yml | 2 +-
 pyproject.toml                | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
index 05f9ce7b..35d36c57 100644
--- a/.github/workflows/codecov.yml
+++ b/.github/workflows/codecov.yml
@@ -22,7 +22,7 @@ jobs:
   pytest:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
         os: [ubuntu-latest, windows-latest]
     runs-on: ${{ matrix.os }}
 
diff --git a/pyproject.toml b/pyproject.toml
index 71124dab..08223a17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ homepage = "https://github.com/PAICookers/PAIBox"
 documentation = "https://github.com/PAICookers/PAIBox#readme"
 keywords = ["PAICORE 2.0", "PAIBox", "SNN", "Toolchain"]
 classifiers = [
+    "Development Status :: 4 - Beta",
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
     "Operating System :: OS Independent",
@@ -24,9 +25,9 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    "Topic :: Software Development :: Build Tools",
-    "Topic :: Software Development :: Libraries",
+    "Topic :: Software Development :: Compilers",
 ]
 packages = [{ include = "paibox" }]
 

From 109b74dbe5241774706a0e97424e10ea911892fd Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 14:22:29 +0800
Subject: [PATCH 152/187] =?UTF-8?q?=F0=9F=97=91=EF=B8=8F=20removed=20an=20?=
 =?UTF-8?q?always=20failed=20pre-commit=20hook?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .pre-commit-config.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 490b6d15..b5b4b101 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,6 @@ repos:
       - id: check-symlinks
       - id: check-merge-conflict
       - id: mixed-line-ending
-      - id: name-tests-test
         args: [--pytest-test-first]
       - id: requirements-txt-fixer
       - id: pretty-format-json

From e0f8342a5be7597a6aa826e68d61bb5cd9591b6b Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 15:42:47 +0800
Subject: [PATCH 153/187] =?UTF-8?q?=E2=9C=A8=20feat(base):=20update=20arg?=
 =?UTF-8?q?=20check=20for=20`DataFlowFormat`=20&=20function=20`set=5Foflow?=
 =?UTF-8?q?=5Fformat`=20in=20neuron.=20Add=20test=20cases?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/base.py                   | 54 ++++++++++++++++++++-----
 paibox/components/neuron/base.py | 69 +++++++++++++++++---------------
 tests/test_base.py               | 19 ++++++++-
 3 files changed, 97 insertions(+), 45 deletions(-)

diff --git a/paibox/base.py b/paibox/base.py
index 89ffdbd0..c47260ff 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -257,26 +257,23 @@ def state(self) -> NodeDict:
         return self._memories
 
 
-INFINITE_DATAFLOW = 0  # the dataflow is infinite.
+INFINITE_DATAFLOW = 0
 
 
 @dataclass
 class DataFlowFormat:
     """Describe in detail the format of valid data in the dataflow."""
 
-    t_1st_vld: int
-    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
+    t_1st_vld: int = 0
+    """Global time or a relative time of the first valid data in the dataflow, determined by `is_local_time`."""
     interval: int = 1
-    """The interval of valid data in the flow."""
+    """The interval of valid data in the dataflow."""
     n_vld: int = INFINITE_DATAFLOW
-    """The number of valid data. 0 for infinite dataflow."""
+    """The number of valid data. <0 for infinite dataflow."""
 
-    def __post_init__(self) -> None:
-        if self.n_vld < INFINITE_DATAFLOW:
-            raise ValueError(
-                f"'n_vld' should be greater than or equal to {INFINITE_DATAFLOW}, "
-                f"but got {self.n_vld}."
-            )
+    is_local_time: bool = True
+    """Whether the `t_1st_vld` is relative to the local time(tws+T) of the neuron, or   \
+        relative to the global time of the external input."""
 
     def t_at_idx(self, idx: int) -> int:
         """The time of the valid data at the given index."""
@@ -295,6 +292,41 @@ def t_last_vld(self) -> int:
         assert self.n_vld > INFINITE_DATAFLOW
         return self.t_at_n(self.n_vld)
 
+    def get_global_t_1st_vld(self, tws: int) -> int:
+        """Get the global time of the first valid data."""
+        return tws + self.t_1st_vld if self.is_local_time else self.t_1st_vld
+
+    def _check_after_assign(self, tws: int, end_tick: int) -> None:
+        _t_1st_vld_out_of_range_text = (
+            "the {0} output time of the first valid data should be in the working "
+            + "time from {1} to {2}, but got {3}."
+        )
+
+        # The global time of the first valid data is in [tws, end_tick].
+        gb_t_1st_vld = self.get_global_t_1st_vld(tws)
+        if gb_t_1st_vld < tws or gb_t_1st_vld > end_tick:
+            if self.is_local_time:
+                raise ValueError(
+                    _t_1st_vld_out_of_range_text.format(
+                        "local", "+0", f"+{end_tick - tws + 1}", self.t_1st_vld
+                    )
+                )
+            else:
+                raise ValueError(
+                    _t_1st_vld_out_of_range_text.format(
+                        "global", tws, end_tick, self.t_1st_vld
+                    )
+                )
+
+        if self.n_vld > INFINITE_DATAFLOW:
+            if (
+                t_last_vld := gb_t_1st_vld + (self.n_vld - 1) * self.interval
+            ) > end_tick:
+                raise ValueError(
+                    f"valid data is output after the end time. The neuron stops working at "
+                    f"{end_tick}, but still needs to output at {t_last_vld}."
+                )
+
 
 class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
 
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 7e226a41..14def12f 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -20,7 +20,7 @@
     get_core_mode,
 )
 
-from paibox.base import DataFlowFormat, NeuDyn, INFINITE_DATAFLOW
+from paibox.base import DataFlowFormat, NeuDyn
 from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
@@ -492,8 +492,8 @@ def __init__(
         self._tws = arg_check_non_neg(tick_wait_start, "'tick_wait_start'")
         self._twe = arg_check_non_neg(tick_wait_end, "'tick_wait_end'")
         self._uf = arg_check_pos(unrolling_factor, "'unrolling_factor'")
-        # Default dataflow is infinite and continuous, starting at `tws`.
-        self.oflow_format = DataFlowFormat(self.tick_wait_start)
+        # Default dataflow is infinite and continuous, starting at tws+0.
+        self.oflow_format = DataFlowFormat(0, is_local_time=True)
 
     def __len__(self) -> int:
         return self._n_neuron
@@ -535,41 +535,44 @@ def set_oflow_format(
         self,
         t_1st_vld: Optional[int] = None,
         interval: Optional[int] = None,
-        n_vld: Optional[int] = None
+        n_vld: Optional[int] = None,
+        *,
+        format_type: type[DataFlowFormat] = DataFlowFormat,
     ) -> None:
-        assert hasattr(self, "oflow_format")
-        _t_1st_vld = (
-            t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
-        )
-
-        _interval = (
-            arg_check_pos(interval, "interval")
-            if isinstance(interval, int)
-            else self.oflow_format.interval
-        )
-
-        _n_vld = (
-            arg_check_non_neg(n_vld, "n_vld")
-            if isinstance(n_vld, int)
-            else self.oflow_format.n_vld
-        )
-
-        if _t_1st_vld < self.tick_wait_start:
-            raise ValueError(
-                f"the output time of the first valid data should be greater than or equal to "
-                f"{self.tick_wait_start}, but got {_t_1st_vld}."
+        """Set the attributes of output dataflow format by given arguments."""
+        if hasattr(self, "oflow_format"):
+            _t_1st_vld = (
+                t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
             )
-
-        if _n_vld > INFINITE_DATAFLOW:
-            if (t_last_vld := _t_1st_vld + (_n_vld - 1) * _interval) > self.end_tick:
+            _interval = (
+                arg_check_pos(interval, "interval")
+                if isinstance(interval, int)
+                else self.oflow_format.interval
+            )
+            _n_vld = (
+                arg_check_non_neg(n_vld, "n_vld")
+                if isinstance(n_vld, int)
+                else self.oflow_format.n_vld
+            )
+            self._assign_flow_format(_t_1st_vld, _interval, _n_vld)
+        else:
+            if not (
+                isinstance(interval, int)
+                and isinstance(n_vld, int)
+                and isinstance(t_1st_vld, int)
+            ):
                 raise ValueError(
-                    f"valid data is output after the end time. The neuron stops working at "
-                    f"{self.end_tick}, but still needs to output at {t_last_vld}."
+                    "if 'oflow_format' is not set, 't_1st_vld', 'interval' & 'n_vld' must be set."
                 )
 
-        self.oflow_format.t_1st_vld = _t_1st_vld
-        self.oflow_format.interval = _interval
-        self.oflow_format.n_vld = _n_vld
+            self.oflow_format = format_type(t_1st_vld, interval, n_vld)
+            self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
+
+    def _assign_flow_format(self, t_1st_vld: int, intv: int, n_vld: int) -> None:
+        self.oflow_format.t_1st_vld = t_1st_vld
+        self.oflow_format.interval = intv
+        self.oflow_format.n_vld = n_vld
+        self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
 
     def __copy__(self) -> "Neuron":
         """Same as `__deepcopy__`."""
diff --git a/tests/test_base.py b/tests/test_base.py
index 5adb2c7f..cd55346d 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -1,7 +1,7 @@
 import pytest
 
 import paibox as pb
-from paibox.base import PAIBoxObject
+from paibox.base import PAIBoxObject, DataFlowFormat
 from paibox.exceptions import RegisterError
 
 
@@ -41,3 +41,20 @@ def test_paiboxobject_nodes():
 
     nodes4 = obj1.nodes(method="absolute", level=-1, include_self=True)
     assert nodes4["obj111"] == obj1
+
+
+class TestDataFlowFormat:
+    def test_dff_infinite_dataflow(self):
+        with pytest.raises((AssertionError, ValueError)):
+            dff = DataFlowFormat(1, 0, -1)
+            _ = dff.t_last_vld
+
+    def test_dff_valid(self):
+        # 1. t1 >= tws, t_last > endtick
+        dff1 = DataFlowFormat(10, 3, 10, is_local_time=False)
+        with pytest.raises(ValueError):
+            dff1._check_after_assign(8, 36)
+
+        # 2. t1 >= tws, t_last <= endtick
+        dff2 = DataFlowFormat(10, 3, 10, is_local_time=True)
+        dff2._check_after_assign(2, 39)

From 870e254554abbd72c8fd16d5f90f997cac128053 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 15:48:51 +0800
Subject: [PATCH 154/187] =?UTF-8?q?=E2=9C=A8=20feat(graphs):=20corrected?=
 =?UTF-8?q?=20the=20calculation=20method=20of=20attribute=20`inherent=5Fti?=
 =?UTF-8?q?mestep`.=20Strictly=20annotate=20the=20data=20flow=20format=20f?=
 =?UTF-8?q?or=20the=20computational=20neuron=20of=20the=20semi-folded=20op?=
 =?UTF-8?q?s.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py            | 20 +++++++++++++-------
 paibox/components/functional.py     | 16 ++++++++++++----
 tests/components/test_functional.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index c3e2751d..0f1f68ad 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -127,7 +127,7 @@ def _pre_build(self, **build_options) -> None:
                 # checks. These additional checks may be removed as more network structures will be supported.
 
                 # Currently, `LinearSemiFolded` is at the end of the network, since it will change the form of
-                # the input data stream, and its effective output is at the same time.
+                # the input dataflow, and its effective output is at the same time.
                 semi_linears = modules.subset(LinearSemiFolded)
                 if not all(
                     len(succ_dg_semi_ops[linear]) == 0 for linear in semi_linears
@@ -172,9 +172,14 @@ def _update_graph(self, **build_options) -> None:
         self.inodes = self._raw_nodes.subset(InputProj)
 
         # By default, nodes with out-degree = 0 are considered as output nodes.
-        self.onodes = self._raw_nodes.key_on_condition(
-            lambda node: self.degree_of_nodes[node].out_degree == 0
-        )  # type: ignore
+        # TODO A node with out-degree can also be an output node. However, no network for now has this topology.
+        self.onodes = Collector(
+            {
+                k: cast(DestNodeType, v)
+                for k, v in self._raw_nodes.items()
+                if self.degree_of_nodes[k].out_degree == 0
+            }
+        ).not_subset(InputProj)
 
         for name, node in self._raw_nodes.items():
             self.nodes[name] = NodeAttr(
@@ -525,9 +530,10 @@ def _find_rg_by_cb(
     @property
     def inherent_timestep(self) -> int:
         self.build_check()
-        _, distance = get_longest_path(self.succ_dg, self.ordered_nodes)
-
-        return distance
+        return max(
+            n.oflow_format.get_global_t_1st_vld(n.tick_wait_start)
+            for n in self.onodes.values()
+        )
 
     @property
     def graph_name_repr(self) -> str:
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index ae0370e6..6755cc5e 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -935,7 +935,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_linear.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(ih):
@@ -1083,7 +1085,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_conv2d.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(kw):
@@ -1246,7 +1250,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_pool2d.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(kw):
@@ -1384,7 +1390,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_pool2d.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(kw):
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 56c80471..4aae8961 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -1043,6 +1043,16 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                         ],
                     )
 
+                    assert conv2d_list[i_conv].tick_wait_start + t_1st_vld_data[
+                        i_conv
+                    ] + i * semi_vld_out_intv[i_conv] - 1 == conv2d_list[
+                        i_conv
+                    ].tick_wait_start + conv2d_list[
+                        i_conv
+                    ].oflow_format.t_at_idx(
+                        i
+                    )
+
             # x is the reference result of the last convolution.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
 
@@ -1053,6 +1063,10 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                     linear.tick_wait_start + linear.oflow_format.t_last_vld
                 ],
             )
+            assert (
+                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
+                == linear.tick_wait_start + linear.oflow_format.t_last_vld
+            )
 
     @pytest.mark.parametrize(
         "ishape_chw, n_pool, kshape_hw, stride, padding, out_features, pool_type",
@@ -1225,6 +1239,16 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                         ],
                     )
 
+                    assert pool2d_list[i_pool].tick_wait_start + t_1st_vld_data[
+                        i_pool
+                    ] + i * semi_vld_out_intv[i_pool] - 1 == pool2d_list[
+                        i_pool
+                    ].tick_wait_start + pool2d_list[
+                        i_pool
+                    ].oflow_format.t_at_idx(
+                        i
+                    )
+
             # x is the reference result of the last pooling.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
 
@@ -1236,6 +1260,11 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                 ],
             )
 
+            assert (
+                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
+                == linear.tick_wait_start + linear.oflow_format.t_last_vld
+            )
+
     @pytest.mark.parametrize(
         "shape, weight",
         [

From c81ffd18886efb41cf7048270c26074350b55c2a Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 15:50:20 +0800
Subject: [PATCH 155/187] =?UTF-8?q?=F0=9F=8F=B7=EF=B8=8F=20typing(neuron):?=
 =?UTF-8?q?=20add=20typed=20dict=20`ExtraNeuAttrKwds`=20to=20check=20extra?=
 =?UTF-8?q?=20keywords=20passing=20to=20neurons?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/modules.py        |  4 ++--
 paibox/components/neuron/base.py    | 10 +++++-----
 paibox/components/neuron/neurons.py | 25 +++++++++++++++----------
 paibox/components/neuron/utils.py   | 24 ++++++++++++++++++++++--
 4 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 41e06fdc..2c13edef 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -14,7 +14,7 @@
 from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
-from .neuron.utils import _input_width_format, _RTModeKwds, _spike_width_format
+from .neuron.utils import _input_width_format, RTModeKwds, _spike_width_format
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
@@ -91,7 +91,7 @@ class NeuModule(NeuDyn, BuildingModule):
     """#N of outputs."""
     inherent_delay: int = 0
     """Internal delay of the module, relative to the external."""
-    rt_mode_kwds: _RTModeKwds
+    rt_mode_kwds: RTModeKwds
     mode: CoreMode
 
     def __init__(
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 14def12f..bef9976b 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -21,7 +21,7 @@
 )
 
 from paibox.base import DataFlowFormat, NeuDyn
-from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
+from paibox.exceptions import ConfigInvalidError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
@@ -44,7 +44,7 @@
     _input_width_format,
     _leak_v_check,
     _mask,
-    _RTModeKwds,
+    RTModeKwds,
     _spike_width_format,
     vjt_overflow,
 )
@@ -57,7 +57,7 @@
 class MetaNeuron:
     """Meta neuron"""
 
-    rt_mode_kwds: _RTModeKwds
+    rt_mode_kwds: RTModeKwds
     mode: CoreMode
 
     def __init__(
@@ -96,8 +96,8 @@ def __init__(
         # check whether the mode is valid
         self.mode = get_core_mode(input_width, spike_width, snn_en)
 
-        if pool_max == True and self.mode != CoreMode.MODE_ANN:
-            raise NotSupportedError(
+        if pool_max and self.mode != CoreMode.MODE_ANN:
+            raise ConfigInvalidError(
                 f"max pooling is only supported in {CoreMode.MODE_ANN.name}, "
                 f"but got {self.mode.name}."
             )
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 85e3df47..3f7f0279 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -8,10 +8,15 @@
 from paibox.types import LEAK_V_DTYPE, DataType, Shape
 
 from .base import Neuron
-from .utils import LEAK_V_MAX
+from .utils import LEAK_V_MAX, ExtraNeuAttrKwds
+
+if sys.version_info >= (3, 12):
+    from typing import Unpack
+else:
+    from typing_extensions import Unpack
 
 if sys.version_info >= (3, 13):
-    from typing import deprecated
+    from warnings import deprecated
 else:
     from typing_extensions import deprecated
 
@@ -37,7 +42,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """IF neuron.
 
@@ -93,7 +98,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """LIF neuron.
 
@@ -152,7 +157,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """Tonic spiking neuron.
 
@@ -178,7 +183,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """Phasic spiking neuron. Once the neuron receives `N` spikes and fires, it will reset to   \
             the negative floor and never fires again. `N` is `fire_step`.
@@ -213,7 +218,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """A neuron that always outputs 1 as long as it starts working.
 
@@ -245,7 +250,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """Bypass neuron. Output is equal to input.
 
@@ -279,7 +284,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """General neuron used in ANN mode. Positive threshold = 1, negative threshold = 0."""
         kwargs["bit_truncation"] = bit_trunc
@@ -299,7 +304,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         super().__init__(
             shape, bias=0, bit_trunc=8, keep_shape=keep_shape, name=name, **kwargs
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index 1d9ea2ff..dd58006f 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -2,7 +2,12 @@
 from typing import Literal, TypedDict, Union
 
 import numpy as np
-from paicorelib import InputWidthFormat, SNNModeEnable, SpikeWidthFormat
+from paicorelib import (
+    InputWidthFormat,
+    SNNModeEnable,
+    SpikeWidthFormat,
+    MaxPoolingEnable,
+)
 from paicorelib.framelib.utils import _mask
 from paicorelib.ram_model import (
     BIT_TRUNCATE_MAX,
@@ -107,9 +112,24 @@ def _get_neu_out_dtype(
         return NEUOUT_U8_DTYPE
 
 
-class _RTModeKwds(TypedDict):
+class RTModeKwds(TypedDict):
     """A typed keywords for runtime mode. Only for checking if necessary."""
 
     input_width: InputWidthFormat
     spike_width: SpikeWidthFormat
     snn_en: SNNModeEnable
+
+
+class ExtraNeuAttrKwds(TypedDict, total=False):
+    """A typed keywords for extra neuron attributes."""
+
+    bit_truncation: int  # For ANNNeuron
+    delay: int
+    tick_wait_start: int
+    tick_wait_end: int
+    input_width: Union[L[1, 8], InputWidthFormat]
+    spike_width: Union[L[1, 8], SpikeWidthFormat]
+    snn_en: Union[bool, SNNModeEnable]
+    pool_max: Union[bool, MaxPoolingEnable]
+    unrolling_factor: int
+    overflow_strict: bool

From 2437357f6014170d8d1d871775266ec8cc2a6b79 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 16:05:46 +0800
Subject: [PATCH 156/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix(compile):=20if=20?=
 =?UTF-8?q?compiling=20with=20`core=5Festimate=5Fonly`=20turned=20on,=20pr?=
 =?UTF-8?q?event=20exporting=20the=20compiled=20results?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/mapper.py | 19 ++++++++++++++++---
 paibox/exceptions.py     |  6 ++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 6012d7fc..b91e2814 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -8,7 +8,7 @@
 
 from paibox.base import SynSys
 from paibox.components import Neuron
-from paibox.exceptions import ConfigInvalidError, ResourceError
+from paibox.exceptions import CompileError, ConfigInvalidError, ResourceError
 from paibox.network import DynSysGroup
 
 from .conf_exporting import *
@@ -71,6 +71,9 @@ def __init__(self) -> None:
             chip_list=_BACKEND_CONTEXT["target_chip_addr"]
         )
 
+        self._core_estimate_only = False
+        """Wether this compilation is for core estimation only. If so, no core will be assigned."""
+
         self.clear()
 
     def clear(self) -> None:
@@ -90,6 +93,8 @@ def clear(self) -> None:
         self.n_core_required = 0
         self.n_core_occupied = 0
 
+        self._core_estimate_only = False
+
         # Set default cflags
         _BACKEND_CONTEXT.cflags.clear()
         set_cflag(enable_wp_opt=True)
@@ -169,6 +174,8 @@ def compile(
             set_cflag(multicast_optim=True)
             set_cflag(multicast_optim_nodes=_mul_optim_nodes)
 
+        self._core_estimate_only = core_estimate_only
+
         """Preperation.
             1. Check whether the PAIGraph has built.
             2. Set global compilation flags.
@@ -192,9 +199,9 @@ def compile(
         self.cb_axon_grouping()
 
         """Core coordinate assignment."""
-        self.coord_assign(core_estimate_only)
+        self.coord_assign(self._core_estimate_only)
 
-        if core_estimate_only:
+        if self._core_estimate_only:
             return GraphInfo(
                 name=self.graph.graph_name_repr,
                 input={},
@@ -619,6 +626,12 @@ def export(
 
         Return: total configurations in dictionary format.
         """
+        if self._core_estimate_only:
+            raise CompileError(
+                "the current compilation is only for core estimation. "
+                "Please disable 'core_estimate_only' and compile again before exporting."
+            )
+
         if format not in ("bin", "npy", "txt"):
             raise ValueError(f"format {format} is not supported.")
 
diff --git a/paibox/exceptions.py b/paibox/exceptions.py
index 55514a43..bcc4d32a 100644
--- a/paibox/exceptions.py
+++ b/paibox/exceptions.py
@@ -74,6 +74,12 @@ class FunctionalError(PAIBoxError, RuntimeError):
     pass
 
 
+class CompileError(PAIBoxError, RuntimeError):
+    """Exception for compilation."""
+
+    pass
+
+
 class RoutingError(PAIBoxError):
     """Exception for routing tree."""
 

From 1e0451fce217a4f322d4a4ba3972ca099fa1aabb Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 16:16:14 +0800
Subject: [PATCH 157/187] =?UTF-8?q?=F0=9F=9A=B8=20typing:=20update=20typin?=
 =?UTF-8?q?g=20&=20error=20handling.=20Removed=20useless=20types?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py    | 66 +++++++++++++++++++------------------
 paibox/backend/mapper.py    | 27 +++++++--------
 paibox/backend/placement.py |  8 +++--
 paibox/backend/routing.py   |  9 ++---
 paibox/backend/types.py     | 13 +++-----
 paibox/exceptions.py        |  8 ++++-
 paibox/utils.py             | 16 ++++-----
 7 files changed, 74 insertions(+), 73 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 0f1f68ad..69a117e7 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -2,14 +2,18 @@
 from collections import defaultdict
 from collections.abc import Iterable, Mapping, Sequence
 from dataclasses import dataclass, field
-from typing import Any, TypeVar, Union
+from typing import Any, TypeVar, Union, cast
 
 from paicorelib import HwConfig
 
 from paibox.collector import Collector
 from paibox.components import FullConnectedSyn, InputProj, NeuModule, Neuron
 from paibox.components.functional import LinearSemiFolded
-from paibox.exceptions import GraphBuildError, GraphConnectionError, NotSupportedError
+from paibox.exceptions import (
+    GraphBuildError,
+    GraphConnectionError,
+    GraphNotSupportedError,
+)
 from paibox.network import DynSysGroup
 from paibox.utils import check_elem_unique
 
@@ -132,7 +136,7 @@ def _pre_build(self, **build_options) -> None:
                 if not all(
                     len(succ_dg_semi_ops[linear]) == 0 for linear in semi_linears
                 ):
-                    raise NotSupportedError(
+                    raise GraphNotSupportedError(
                         "currently, the semi-folded linear can only be used as output of the network."
                     )
 
@@ -183,9 +187,7 @@ def _update_graph(self, **build_options) -> None:
 
         for name, node in self._raw_nodes.items():
             self.nodes[name] = NodeAttr(
-                node=node,
-                position=self._node_pos(name),
-                degree=self.degree_of_nodes[name],
+                node, self._node_pos(name), self.degree_of_nodes[name]
             )
 
         self.ordered_nodes = toposort(self.succ_dg)
@@ -220,7 +222,7 @@ def topo_support_check(self) -> None:
             onode.num_out > HwConfig.N_FANIN_PER_DENDRITE_MAX
             for onode in self.onodes.values()
         ):
-            raise NotSupportedError(
+            raise GraphNotSupportedError(
                 f"only output nodes with no more than {HwConfig.N_FANIN_PER_DENDRITE_MAX} "
                 f"neurons are supported."
             )
@@ -541,7 +543,7 @@ def graph_name_repr(self) -> str:
         return _prefix + "_and_".join(network.name for network in self._raw_networks)
 
 
-_NT = TypeVar("_NT", CoreBlock, NodeName, RoutingGroup)
+_NT = TypeVar("_NT", CoreBlock, NodeName, RoutingGroup, MergedSuccGroup)
 _T = TypeVar("_T")
 
 
@@ -557,7 +559,7 @@ def _degree_check(
                     if isinstance(succ_node, CoreBlock)
                     else str(succ_node)
                 )
-                raise NotSupportedError(
+                raise GraphNotSupportedError(
                     f"If out-degree of a node is greater than 1, the in-degree of its sucessors must be 1. "
                     f"However, in-degree of {_node_repr} is {degree_of_nodes[succ_node].in_degree}."
                 )
@@ -570,7 +572,7 @@ def find_cycles(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[list[_NT]]:
     stack_set: set[_NT] = set()  # 方便快速检查路径中的节点
 
     # 深度优先搜索的辅助函数
-    def dfs(node: _NT):
+    def dfs(node: _NT) -> None:
         if node in stack_set:  # 检测到环
             cycle_start_index = stack.index(node)
             cycles.append(stack[cycle_start_index:])
@@ -596,45 +598,45 @@ def dfs(node: _NT):
     return cycles
 
 
-def merge_overlap(groups: Iterable[Iterable[_NT]]) -> list[list[_NT]]:
+def merge_overlap(groups: Iterable[Sequence[_NT]]) -> list[list[_NT]]:
     # 并查集数据结构
     parent: dict[_NT, _NT] = dict()
 
     # 查找集合的根节点
-    def find(x):
+    def find(x: _NT) -> _NT:
         if parent[x] != x:
             parent[x] = find(parent[x])
+
         return parent[x]
 
     # 合并两个集合
-    def union(x, y):
-        rootX = find(x)
-        rootY = find(y)
-        if rootX != rootY:
-            parent[rootY] = rootX
+    def union(x, y) -> None:
+        rootx = find(x)
+        rooty = find(y)
+        if rootx != rooty:
+            parent[rooty] = rootx
 
     # 初始化并查集
     for group in groups:
-        for element in group:
-            if element not in parent:
-                parent[element] = element
+        for elem in group:
+            if elem not in parent:
+                parent[elem] = elem
 
     # 合并所有相互重叠的环
     for group in groups:
-        first_element = group[0]
-        for element in group[1:]:
-            union(first_element, element)
+        first_elem = group[0]
+        for elem in group[1:]:
+            union(first_elem, elem)
 
     # 根据并查集结果，将所有节点归类到同一个集合中
-    merged_groups: dict[_NT, list[_NT]] = dict()
-    for element in parent:
-        root = find(element)
-        if root not in merged_groups:
-            merged_groups[root] = []
-        merged_groups[root].append(element)
+    mgrps: dict[_NT, list[_NT]] = dict()
+    for elem in parent:
+        root = find(elem)
+        if root not in mgrps:
+            mgrps[root] = []
+        mgrps[root].append(elem)
 
-    # 将结果转换为列表列表形式
-    return list(merged_groups.values())
+    return list(mgrps.values())
 
 
 def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
@@ -691,7 +693,7 @@ def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
                 vertices.add(m)
 
     if any(incoming_edges.get(v, None) for v in directed_edges):
-        raise NotSupportedError("the graph with cycles is not supported.")
+        raise GraphNotSupportedError("the graph with cycles is not supported.")
 
     return ordered
 
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index b91e2814..e0bfb715 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -45,10 +45,11 @@
 
 
 class Mapper:
-    graph = PAIGraph()
+    graph: PAIGraph
     graph_info: GraphInfo
 
     def __init__(self) -> None:
+        self.graph = PAIGraph()
         self.core_blocks: list[CoreBlock] = []
         """List for core blocks in the network."""
         self.succ_core_blocks: dict[CoreBlock, list[CoreBlock]] = defaultdict(list)
@@ -381,7 +382,7 @@ def config_export(self) -> GraphInfo:
         ]:
             raise ConfigInvalidError(
                 f"the output chip address {ochip_coord} should not overlap with the "
-                f"chip addresses, but got {_BACKEND_CONTEXT._target_chip_addr_repr()}."
+                f"target chip addresses, but got {_BACKEND_CONTEXT._target_chip_addr_repr()}."
             )
 
         input_nodes_info = self._inpproj_config_export()
@@ -704,27 +705,27 @@ def _find_dest_cb_by_nseg(
         return dest_cb_of_nseg
 
 
-def cycle_merge(merged_sgrps: list[MergedSuccGroup]):
-    succ_merged_sgrps: dict[MergedSuccGroup, list[MergedSuccGroup]] = dict()
+def cycle_merge(merged_sgrps: list[MergedSuccGroup]) -> list[MergedSuccGroup]:
+    succ_merged_sgrps: dict[MergedSuccGroup, list[MergedSuccGroup]] = defaultdict(list)
+
     for msgrp in merged_sgrps:
-        succ_merged_sgrps[msgrp] = []
-        nodes = set(msgrp.nodes)
         for _msgrp in merged_sgrps:
             if msgrp == _msgrp:
                 continue
-            if not nodes.isdisjoint(_msgrp.input_nodes):
+            if not msgrp.nodes.isdisjoint(_msgrp.input_nodes):
                 succ_merged_sgrps[msgrp].append(_msgrp)
 
     cycles: list[list[MergedSuccGroup]] = find_cycles(succ_merged_sgrps)
     merged_cycles: list[list[MergedSuccGroup]] = merge_overlap(cycles)
 
     processed_merged_cycles: list[MergedSuccGroup] = list()
-    remaining_merged_sgrps: set[MergedSuccGroup] = set(merged_sgrps)
-    for merged_cycle in merged_cycles:
-        processed_merged_cycles.append(MergedSuccGroup.merge(merged_cycle))
-        for msgrp in merged_cycle:
-            remaining_merged_sgrps.remove(msgrp)
-    processed_merged_cycles.extend(remaining_merged_sgrps)
+    remaining_msgrps: set[MergedSuccGroup] = set(merged_sgrps)
+    for mc in merged_cycles:
+        processed_merged_cycles.append(MergedSuccGroup.merge(mc))
+        for msgrp in mc:
+            remaining_msgrps.remove(msgrp)
+
+    processed_merged_cycles.extend(remaining_msgrps)
     return processed_merged_cycles
 
 
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index e77e0035..3fc370b9 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -1,6 +1,6 @@
 import math
 import warnings
-from typing import ClassVar, Literal, Optional, overload
+from typing import ClassVar, Literal, Optional, cast, overload
 
 import numpy as np
 from paicorelib import LCN_EX, ChipCoord, Coord, CoreMode, HwConfig, MaxPoolingEnable
@@ -177,7 +177,9 @@ def shape(self) -> tuple[int, int]:
     @property
     def source(self) -> list[SourceNodeType]:
         """Ordered unique source nodes."""
-        return list(set([parent.source for parent in self.obj]))
+        return cast(
+            list[SourceNodeType], list(set([parent.source for parent in self.obj]))
+        )
 
     @property
     def axons(self) -> list[SourceNodeType]:
@@ -186,7 +188,7 @@ def axons(self) -> list[SourceNodeType]:
     @property
     def dest(self) -> list[DestNodeType]:
         """Ordered unique destination nodes."""
-        return list(set([parent.dest for parent in self.obj]))
+        return cast(list[DestNodeType], list(set([parent.dest for parent in self.obj])))
 
     def n_axon_of(self, index: int) -> int:
         """Get the #N of axons of `index`-th source neuron."""
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 9154d185..2e854d6d 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -11,19 +11,14 @@
 from paicorelib import RoutingLevel as Level
 from paicorelib.routing_defs import MAX_ROUTING_PATH_LENGTH
 
-from paibox.exceptions import (
-    GraphBuildError,
-    PAIBoxDeprecationWarning,
-    ResourceError,
-    RoutingError,
-)
+from paibox.exceptions import PAIBoxDeprecationWarning, ResourceError, RoutingError
 
 from .conf_types import CorePlmConfInChip
 from .placement import CoreBlock, EmptyCorePlacement
 from .types import *
 
 if sys.version_info >= (3, 13):
-    from typing import deprecated
+    from warnings import deprecated
 else:
     from typing_extensions import deprecated
 
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 778e3853..b8f34f3b 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -30,7 +30,6 @@
     "NodeDegree",
     "NodeAttr",
     "EdgeAttr",
-    "PartitionedEdges",
     "NeuSlice",
     "NeuSegment",
     "NeuSegOfCorePlm",
@@ -88,23 +87,19 @@ def copy(self) -> "NodeDegree":
         return self.__deepcopy__()
 
 
-class NodeAttr(NamedTuple):
+@dataclass
+class NodeAttr:
     node: NodeType
     position: NodePosition
     degree: NodeDegree
 
 
-class EdgeAttr(NamedTuple):
+@dataclass
+class EdgeAttr:  # TODO FIXME distance?
     edge: EdgeType
     distance: int
 
 
-class PartitionedEdges(NamedTuple):
-    edges: set[EdgeType]
-    rg_id: int
-    rt_mode: CoreMode = CoreMode.MODE_SNN  # XXX Temp solution
-
-
 NeuSlice: TypeAlias = slice
 
 
diff --git a/paibox/exceptions.py b/paibox/exceptions.py
index bcc4d32a..e5204ad0 100644
--- a/paibox/exceptions.py
+++ b/paibox/exceptions.py
@@ -57,7 +57,13 @@ class GraphConnectionError(GraphBuildError):
 
 
 class NotSupportedError(PAIBoxError, NotImplementedError):
-    """Exception for a certain function not supported."""
+    """Exception for unsupported functions."""
+
+    pass
+
+
+class GraphNotSupportedError(GraphBuildError, NotSupportedError):
+    """Eception for unsupported structures of graph."""
 
     pass
 
diff --git a/paibox/utils.py b/paibox/utils.py
index 6c43864b..906933cf 100644
--- a/paibox/utils.py
+++ b/paibox/utils.py
@@ -155,33 +155,33 @@ def reverse_16bit(x: int) -> int:
     return ((x >> 8) | (x << 8)) & 0xFFFF
 
 
+def _get_desc(desc: Optional[str] = None) -> str:
+    return "value" if desc is None else desc
+
+
 def arg_check_pos(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg < 1:
-        raise ValueError(f"{_desc} must be positive, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be positive, but got {arg}.")
 
     return arg
 
 
 def arg_check_non_pos(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg > 0:
-        raise ValueError(f"{_desc} must be non-positive, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be non-positive, but got {arg}.")
 
     return arg
 
 
 def arg_check_neg(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg > -1:
-        raise ValueError(f"{_desc} must be negative, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be negative, but got {arg}.")
 
     return arg
 
 
 def arg_check_non_neg(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg < 0:
-        raise ValueError(f"{_desc} must be non-negative, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be non-negative, but got {arg}.")
 
     return arg

From 72507079ee700921534682ff0a684b321a1bf15d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Dec 2024 08:19:13 +0000
Subject: [PATCH 158/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/base.py                    | 2 +-
 paibox/components/modules.py      | 2 +-
 paibox/components/neuron/base.py  | 2 +-
 paibox/components/neuron/utils.py | 2 +-
 tests/test_base.py                | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/paibox/base.py b/paibox/base.py
index c47260ff..fbfe3959 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -1,5 +1,5 @@
-from dataclasses import dataclass
 import sys
+from dataclasses import dataclass
 from typing import Any, ClassVar, Literal, Optional
 
 import numpy as np
diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 2c13edef..703c1a00 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -14,7 +14,7 @@
 from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
-from .neuron.utils import _input_width_format, RTModeKwds, _spike_width_format
+from .neuron.utils import RTModeKwds, _input_width_format, _spike_width_format
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index bef9976b..635ac3f6 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -41,10 +41,10 @@
 from .utils import (
     BIT_TRUNCATE_MAX,
     NEG_THRES_MIN,
+    RTModeKwds,
     _input_width_format,
     _leak_v_check,
     _mask,
-    RTModeKwds,
     _spike_width_format,
     vjt_overflow,
 )
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index dd58006f..349ea050 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -4,9 +4,9 @@
 import numpy as np
 from paicorelib import (
     InputWidthFormat,
+    MaxPoolingEnable,
     SNNModeEnable,
     SpikeWidthFormat,
-    MaxPoolingEnable,
 )
 from paicorelib.framelib.utils import _mask
 from paicorelib.ram_model import (
diff --git a/tests/test_base.py b/tests/test_base.py
index cd55346d..efe9a2ef 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -1,7 +1,7 @@
 import pytest
 
 import paibox as pb
-from paibox.base import PAIBoxObject, DataFlowFormat
+from paibox.base import DataFlowFormat, PAIBoxObject
 from paibox.exceptions import RegisterError
 
 

From efc09c2e66ea72c41d9fb5c4c18fa1ffc444dbcc Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 14:16:04 +0800
Subject: [PATCH 159/187] =?UTF-8?q?=E2=9C=A8=20feat(base):=20use=20`DataSt?=
 =?UTF-8?q?reamFormat`=20to=20descriibe=20the=20format=20of=20dataflow.=20?=
 =?UTF-8?q?Update=20the=20update=20logic=20of=20dataflow=20format=20betwee?=
 =?UTF-8?q?n=20semi-folded=20ops.=20Labeling=20the=20dataflow=20format=20o?=
 =?UTF-8?q?n=20neurons?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/base.py                      |  51 ++++++++++++
 paibox/components/_modules.py       |  36 ++------
 paibox/components/functional.py     | 122 ++++++++++++++++------------
 paibox/components/neuron/base.py    |  46 ++++++++++-
 paibox/network.py                   |  14 ++--
 tests/components/test_functional.py |  13 +--
 6 files changed, 187 insertions(+), 95 deletions(-)

diff --git a/paibox/base.py b/paibox/base.py
index 31e25387..89ffdbd0 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 import sys
 from typing import Any, ClassVar, Literal, Optional
 
@@ -256,6 +257,45 @@ def state(self) -> NodeDict:
         return self._memories
 
 
+INFINITE_DATAFLOW = 0  # the dataflow is infinite.
+
+
+@dataclass
+class DataFlowFormat:
+    """Describe in detail the format of valid data in the dataflow."""
+
+    t_1st_vld: int
+    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
+    interval: int = 1
+    """The interval of valid data in the flow."""
+    n_vld: int = INFINITE_DATAFLOW
+    """The number of valid data. 0 for infinite dataflow."""
+
+    def __post_init__(self) -> None:
+        if self.n_vld < INFINITE_DATAFLOW:
+            raise ValueError(
+                f"'n_vld' should be greater than or equal to {INFINITE_DATAFLOW}, "
+                f"but got {self.n_vld}."
+            )
+
+    def t_at_idx(self, idx: int) -> int:
+        """The time of the valid data at the given index."""
+        if self.n_vld > INFINITE_DATAFLOW:
+            assert 0 <= idx <= self.n_vld - 1
+
+        return self.t_1st_vld + idx * self.interval
+
+    def t_at_n(self, n: int) -> int:
+        """The time of the n-th valid data."""
+        return self.t_at_idx(n - 1)
+
+    @property
+    def t_last_vld(self) -> int:
+        """The time of the last valid data."""
+        assert self.n_vld > INFINITE_DATAFLOW
+        return self.t_at_n(self.n_vld)
+
+
 class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
 
     _delay: int
@@ -266,6 +306,9 @@ class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
     _uf: int
     """unrolling_factor"""
 
+    oflow_format: DataFlowFormat
+    """The format of output data stream"""
+
     def __init__(self, name: Optional[str] = None) -> None:
         super().__init__(name)
         self.master_nodes = NodeDict()
@@ -291,6 +334,14 @@ def tick_wait_end(self) -> int:
     def unrolling_factor(self) -> int:
         return self._uf
 
+    @property
+    def end_tick(self) -> int:
+        """End time of work."""
+        if self.tick_wait_end == 0:
+            return 9999  # Never end
+
+        return self.tick_wait_start + self.tick_wait_end - 1
+
     @unrolling_factor.setter
     def unrolling_factor(self, factor: int) -> None:
         self._uf = arg_check_pos(factor, "'unrolling_factor'")
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 30ca7397..a2a0953d 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,12 +1,10 @@
-import math
 import typing
-from dataclasses import dataclass
 from typing import Literal, Optional, Union
 
 import numpy as np
 from paicorelib import TM, HwConfig
 
-from paibox.base import NeuDyn, NodeList
+from paibox.base import DataFlowFormat, NeuDyn, NodeList
 from paibox.exceptions import ResourceError, ShapeError
 from paibox.types import (
     LEAK_V_DTYPE,
@@ -58,7 +56,7 @@
     "_SpikingPool2dWithV",
     "_SemiFoldedModule",
     "_LinearBase",
-    "SemiFoldedStreamAttr",
+    "SemiFoldedDataFlowFormat",
 ]
 
 
@@ -161,41 +159,21 @@ class _DelayChainANN(_DelayChainBase):
     pass
 
 
-@dataclass(frozen=True)
-class SemiFoldedStreamAttr:
-    """Details of transmission of valid data in semi-folded form data stream."""
-
-    t_1st_vld: int
-    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
-    interval: int
-    """The interval of the output data stream."""
-    n_data: int = 0
-    """The number of valid output data."""
-
-    def t_at(self, n: int) -> int:
-        """The time of the n-th valid data."""
-        if self.n_data > 0:
-            assert 1 <= n <= self.n_data
-
-        return self.t_1st_vld + (n - 1) * self.interval
-
-    @property
-    def t_last_vld(self) -> int:
-        """The time of the last valid data."""
-        assert self.n_data > 0
-        return self.t_at(self.n_data)
+class SemiFoldedDataFlowFormat(DataFlowFormat):
+    pass
 
 
 @set_rt_mode_ann()
 class _SemiFoldedModule(FunctionalModule):
     """Functional modules with interfaces in semi-folded form. Use `build()` of class `HasSemiFoldedIntf`."""
 
-    ostream_attr: SemiFoldedStreamAttr
+    inherent_delay = 1
+    oflow_format: SemiFoldedDataFlowFormat
 
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         raise NotImplementedError
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 37aa6fc9..e3701a7f 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -905,22 +905,23 @@ class LinearSemiFolded(_LinearBase, _SemiFoldedModule):
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
-        self.ostream_attr = incoming_stream_attr
-        twe = 1 + self.ostream_attr.t_last_vld
+        # For semi-folded linear, the valid output is at only one timestep.
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_last_vld, 1, 1
+        )
+        twe = 1 + self.oflow_format.t_last_vld
 
         ich, ih = self.source[0].shape_out
 
-        if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(ich, ih, ih, incoming_stream_attr.interval)
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
 
-        n_fc = ANNNeuron(
+        n_linear = ANNNeuron(
             self.shape_out,
             self.bias,
             self.bit_trunc,
@@ -930,13 +931,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_linear.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
 
         for i in range(ih):
             neuron = ANNBypassNeuron(
                 shape=(ich, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -954,15 +958,15 @@ def build(
             w = self.weights[ih - i - 1 :: ih, :]
             syn2 = FullConnSyn(
                 neuron,
-                n_fc,
+                n_linear,
                 weights=w,
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
             )
             s_weight.append(syn2)
 
-        generated = [n_fc, *n_delays, *s_delays, *s_weight]
-        self._rebuild_out_intf(network, n_fc, *generated, **build_options)
+        generated = [n_linear, *n_delays, *s_delays, *s_weight]
+        self._rebuild_out_intf(network, n_linear, *generated, **build_options)
 
         return generated
 
@@ -1008,9 +1012,11 @@ def __init__(
         # XXX Do not consider the case when the shape of source neurons needs to be changed, for now.
         # neuron_s.shape_change((in_ch, in_h))
 
-        cout, cin, kh, _ = kernel.shape
+        cout, cin, kh, kw = kernel.shape
         out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
 
+        assert self.padding[0] < kh and self.padding[1] < kw
+
         if in_ch != cin:
             raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
@@ -1034,7 +1040,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1047,14 +1053,15 @@ def build(
         _, cin, _, kw = self.kernel.shape
         _, ow = self.shape_out
 
-        self.ostream_attr = SemiFoldedStreamAttr(
-            incoming_stream_attr.t_at(kw - self.padding[0]),
-            incoming_stream_attr.interval * self.stride[1],
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_at_n(kw - self.padding[0]),
+            incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.ostream_attr.t_last_vld
+        twe = 1 + self.oflow_format.t_last_vld
+
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
 
         n_delays = NodeList()
         n_neg_padding = NodeList()
@@ -1072,12 +1079,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_conv2d.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
+
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 name=f"n{i}_delay_{self.name}",
             )
             n_delays.append(neuron)
@@ -1105,13 +1116,13 @@ def build(
         # Add additional negative padding layer to eliminate the incorrect output
         # NOTE: `t_1st_vld` = 0 & `padding[0]` > 0 means the previous layer is
         # an input node. No need to add negative padding layer for this case.
-        if incoming_stream_attr.t_1st_vld > 0:
+        if incoming_flow_format.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, ih),
-                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
+                    delay=1 + incoming_flow_format.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=incoming_stream_attr.t_1st_vld,
+                    tick_wait_end=incoming_flow_format.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1196,7 +1207,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1209,20 +1220,20 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.ostream_attr = SemiFoldedStreamAttr(
-            incoming_stream_attr.t_at(kw),
-            incoming_stream_attr.interval * self.stride[1],
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_at_n(kw),
+            incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.ostream_attr.t_last_vld
+        twe = 1 + self.oflow_format.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
 
         n_delays = NodeList()
         s_delays = NodeList()
 
-        pool2d = ANNNeuron(
+        n_pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
@@ -1231,13 +1242,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_pool2d.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
 
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1253,7 +1267,7 @@ def build(
             s_delays.append(syn1)
             syn2 = MaxPoolSyn(
                 neuron,
-                pool2d,
+                n_pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
                     cin, ih, ow, kh, self.stride, (0, 0)
                 ),
@@ -1261,8 +1275,8 @@ def build(
             )
             s_delays.append(syn2)
 
-        generated = [pool2d, *n_delays, *s_delays]
-        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
+        generated = [n_pool2d, *n_delays, *s_delays]
+        self._rebuild_out_intf(network, n_pool2d, *generated, **build_options)
 
         return generated
 
@@ -1302,6 +1316,8 @@ def __init__(
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
         out_h = (in_h - self.kernel_size[0] + 2 * self.padding[0]) // self.stride[0] + 1
+        kh, kw = self.kernel_size
+        assert self.padding[0] < kh and self.padding[1] < kw
 
         super().__init__(
             neuron_s,
@@ -1314,7 +1330,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_stream_attr: SemiFoldedStreamAttr,
+        incoming_flow_format: SemiFoldedDataFlowFormat,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1327,15 +1343,15 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.ostream_attr = SemiFoldedStreamAttr(
-            incoming_stream_attr.t_at(kw - self.padding[0]),
-            incoming_stream_attr.interval * self.stride[1],
+        self.oflow_format = SemiFoldedDataFlowFormat(
+            incoming_flow_format.t_at_n(kw - self.padding[0]),
+            incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.ostream_attr.t_last_vld
+        twe = 1 + self.oflow_format.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
 
         # NOTE: Division is achieved with the help of output truncation.
         # TODO Since division with a divisor that is an integer power of 2 can only be implemented by
@@ -1355,7 +1371,7 @@ def build(
         s_delays = NodeList()
         s_neg_padding = NodeList()
 
-        pool2d = ANNNeuron(
+        n_pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             bit_trunc=bit_trunc,
@@ -1364,12 +1380,16 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
+        n_pool2d.set_oflow_format(
+            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+        )
+
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_stream_attr.interval * i + 1,
+                delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_stream_attr.interval * i,
+                tick_wait_end=twe - incoming_flow_format.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1385,7 +1405,7 @@ def build(
             s_delays.append(syn1)
             syn2 = FullConnSyn(
                 neuron,
-                pool2d,
+                n_pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
                     cin, ih, ow, kh, self.stride, self.padding
                 ),
@@ -1395,13 +1415,13 @@ def build(
             s_delays.append(syn2)
 
         # Add additional negative padding layer to eliminate the incorrect output
-        if incoming_stream_attr.t_1st_vld > 0:
+        if incoming_flow_format.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, ih),
-                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
+                    delay=1 + incoming_flow_format.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=incoming_stream_attr.t_1st_vld,
+                    tick_wait_end=incoming_flow_format.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1418,7 +1438,7 @@ def build(
 
                 syn2 = FullConnSyn(
                     neuron,
-                    pool2d,
+                    n_pool2d,
                     weights=-_poo2d_semifolded_mapping_mask(
                         cin, ih, ow, kh, self.stride, self.padding
                     ),
@@ -1427,8 +1447,8 @@ def build(
                 )
                 s_neg_padding.append(syn2)
 
-        generated = [pool2d, *n_delays, *n_neg_padding, *s_delays, *s_neg_padding]
-        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
+        generated = [n_pool2d, *n_delays, *n_neg_padding, *s_delays, *s_neg_padding]
+        self._rebuild_out_intf(network, n_pool2d, *generated, **build_options)
 
         return generated
 
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index a35fa8b5..7e226a41 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -20,7 +20,7 @@
     get_core_mode,
 )
 
-from paibox.base import NeuDyn
+from paibox.base import DataFlowFormat, NeuDyn, INFINITE_DATAFLOW
 from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
@@ -487,11 +487,13 @@ def __init__(
             ),
         )
 
-        """Auxiliary internal stateful attributes for debugging"""
+        """Non-stateful attributes."""
         self._delay = arg_check_pos(delay, "'delay'")
         self._tws = arg_check_non_neg(tick_wait_start, "'tick_wait_start'")
         self._twe = arg_check_non_neg(tick_wait_end, "'tick_wait_end'")
         self._uf = arg_check_pos(unrolling_factor, "'unrolling_factor'")
+        # Default dataflow is infinite and continuous, starting at `tws`.
+        self.oflow_format = DataFlowFormat(self.tick_wait_start)
 
     def __len__(self) -> int:
         return self._n_neuron
@@ -529,6 +531,46 @@ def update(
     def reset_state(self, *args, **kwargs) -> None:
         self.reset_memory()  # Call reset of `StatusMemory`.
 
+    def set_oflow_format(
+        self,
+        t_1st_vld: Optional[int] = None,
+        interval: Optional[int] = None,
+        n_vld: Optional[int] = None
+    ) -> None:
+        assert hasattr(self, "oflow_format")
+        _t_1st_vld = (
+            t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
+        )
+
+        _interval = (
+            arg_check_pos(interval, "interval")
+            if isinstance(interval, int)
+            else self.oflow_format.interval
+        )
+
+        _n_vld = (
+            arg_check_non_neg(n_vld, "n_vld")
+            if isinstance(n_vld, int)
+            else self.oflow_format.n_vld
+        )
+
+        if _t_1st_vld < self.tick_wait_start:
+            raise ValueError(
+                f"the output time of the first valid data should be greater than or equal to "
+                f"{self.tick_wait_start}, but got {_t_1st_vld}."
+            )
+
+        if _n_vld > INFINITE_DATAFLOW:
+            if (t_last_vld := _t_1st_vld + (_n_vld - 1) * _interval) > self.end_tick:
+                raise ValueError(
+                    f"valid data is output after the end time. The neuron stops working at "
+                    f"{self.end_tick}, but still needs to output at {t_last_vld}."
+                )
+
+        self.oflow_format.t_1st_vld = _t_1st_vld
+        self.oflow_format.interval = _interval
+        self.oflow_format.n_vld = _n_vld
+
     def __copy__(self) -> "Neuron":
         """Same as `__deepcopy__`."""
         return self.__deepcopy__()
diff --git a/paibox/network.py b/paibox/network.py
index 4f0a4c12..6db83afd 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -7,7 +7,7 @@
 from .base import DynamicSys, SynSys
 from .collector import Collector
 from .components import NeuModule, Neuron, Projection
-from .components._modules import SemiFoldedStreamAttr, _SemiFoldedModule
+from .components._modules import SemiFoldedDataFlowFormat, _SemiFoldedModule
 from .components.modules import BuiltComponentType
 from .exceptions import NotSupportedError
 from .mixin import Container
@@ -102,18 +102,18 @@ def build_modules(
 
         generated = dict()
 
-        # For external input stream info:
-        # 1. The start time is 1
-        # 2. The interval is 1
-        # 3. The #N of data is -1 since it dosen't effect the subsequent output stream.
+        # For external input dataflow:
+        # 1. The start time is 0.
+        # 2. The interval is 1.
+        # 3. The #N of data is `INFINITE_DATA_STREAM` since it dosen't effect the subsequent output dataflow.
         # TODO Reserve an interface for setting the properties of external input from `FRONTEND_ENV`?
-        last_vld_output_attr = SemiFoldedStreamAttr(0, 1)
+        last_vld_output_attr = SemiFoldedDataFlowFormat(t_1st_vld=0)
 
         for m in modules:
             # TODO for the case of the ResBlock, the `pred_dg_semi_ops` will be used.
             if isinstance(m, _SemiFoldedModule):
                 generated[m] = m.build(self, last_vld_output_attr, **build_options)
-                last_vld_output_attr = m.ostream_attr
+                last_vld_output_attr = m.oflow_format
             else:
                 generated[m] = m.build(self, **build_options)
 
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index c2c63a82..56c80471 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -4,6 +4,7 @@
 import paibox as pb
 from paibox.base import DynamicSys
 from paibox.components import NeuModule
+from paibox.components._modules import _SemiFoldedModule
 from paibox.components.neuron.base import MetaNeuron
 from paibox.components.synapses.conv_utils import _conv2d_faster, _pair, _single
 from paibox.network import DynSysGroup
@@ -976,12 +977,12 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim1.add_probe(probe_linear)
 
-        semi_folded_modules = [*conv2d_list, linear]
+        semi_folded_modules: list[_SemiFoldedModule] = [*conv2d_list, linear]
         # The interval & the time o the first valid data of the external input data stream
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_conv
         for i in range(n_conv):
             if i == 0:
@@ -1049,7 +1050,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
+                    linear.tick_wait_start + linear.oflow_format.t_last_vld
                 ],
             )
 
@@ -1171,12 +1172,12 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim1.add_probe(probe_linear)
 
-        semi_folded_modules = [*pool2d_list, linear]
+        semi_folded_modules: list[_SemiFoldedModule] = [*pool2d_list, linear]
         # The interval & the time o the first valid data of the external input data stream
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_pool
         for i in range(n_pool):
             if i == 0:
@@ -1231,7 +1232,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
+                    linear.tick_wait_start + linear.oflow_format.t_last_vld
                 ],
             )
 

From 865c8be6bf3d43a1323b5051eff02dabc365e454 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 14:19:58 +0800
Subject: [PATCH 160/187] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20support=20py3.13?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/codecov.yml | 2 +-
 pyproject.toml                | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
index 05f9ce7b..35d36c57 100644
--- a/.github/workflows/codecov.yml
+++ b/.github/workflows/codecov.yml
@@ -22,7 +22,7 @@ jobs:
   pytest:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
         os: [ubuntu-latest, windows-latest]
     runs-on: ${{ matrix.os }}
 
diff --git a/pyproject.toml b/pyproject.toml
index 71124dab..08223a17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ homepage = "https://github.com/PAICookers/PAIBox"
 documentation = "https://github.com/PAICookers/PAIBox#readme"
 keywords = ["PAICORE 2.0", "PAIBox", "SNN", "Toolchain"]
 classifiers = [
+    "Development Status :: 4 - Beta",
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
     "Operating System :: OS Independent",
@@ -24,9 +25,9 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    "Topic :: Software Development :: Build Tools",
-    "Topic :: Software Development :: Libraries",
+    "Topic :: Software Development :: Compilers",
 ]
 packages = [{ include = "paibox" }]
 

From 5c1ef57826ff6ac41de9545b7146e7b7f9c60cd2 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 14:22:29 +0800
Subject: [PATCH 161/187] =?UTF-8?q?=F0=9F=97=91=EF=B8=8F=20removed=20an=20?=
 =?UTF-8?q?always=20failed=20pre-commit=20hook?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .pre-commit-config.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 490b6d15..b5b4b101 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,6 @@ repos:
       - id: check-symlinks
       - id: check-merge-conflict
       - id: mixed-line-ending
-      - id: name-tests-test
         args: [--pytest-test-first]
       - id: requirements-txt-fixer
       - id: pretty-format-json

From 468efa687780f23c0f0dfec6f710163d6a7fc7fe Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 15:42:47 +0800
Subject: [PATCH 162/187] =?UTF-8?q?=E2=9C=A8=20feat(base):=20update=20arg?=
 =?UTF-8?q?=20check=20for=20`DataFlowFormat`=20&=20function=20`set=5Foflow?=
 =?UTF-8?q?=5Fformat`=20in=20neuron.=20Add=20test=20cases?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/base.py                   | 54 ++++++++++++++++++++-----
 paibox/components/neuron/base.py | 69 +++++++++++++++++---------------
 tests/test_base.py               | 19 ++++++++-
 3 files changed, 97 insertions(+), 45 deletions(-)

diff --git a/paibox/base.py b/paibox/base.py
index 89ffdbd0..c47260ff 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -257,26 +257,23 @@ def state(self) -> NodeDict:
         return self._memories
 
 
-INFINITE_DATAFLOW = 0  # the dataflow is infinite.
+INFINITE_DATAFLOW = 0
 
 
 @dataclass
 class DataFlowFormat:
     """Describe in detail the format of valid data in the dataflow."""
 
-    t_1st_vld: int
-    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
+    t_1st_vld: int = 0
+    """Global time or a relative time of the first valid data in the dataflow, determined by `is_local_time`."""
     interval: int = 1
-    """The interval of valid data in the flow."""
+    """The interval of valid data in the dataflow."""
     n_vld: int = INFINITE_DATAFLOW
-    """The number of valid data. 0 for infinite dataflow."""
+    """The number of valid data. <0 for infinite dataflow."""
 
-    def __post_init__(self) -> None:
-        if self.n_vld < INFINITE_DATAFLOW:
-            raise ValueError(
-                f"'n_vld' should be greater than or equal to {INFINITE_DATAFLOW}, "
-                f"but got {self.n_vld}."
-            )
+    is_local_time: bool = True
+    """Whether the `t_1st_vld` is relative to the local time(tws+T) of the neuron, or   \
+        relative to the global time of the external input."""
 
     def t_at_idx(self, idx: int) -> int:
         """The time of the valid data at the given index."""
@@ -295,6 +292,41 @@ def t_last_vld(self) -> int:
         assert self.n_vld > INFINITE_DATAFLOW
         return self.t_at_n(self.n_vld)
 
+    def get_global_t_1st_vld(self, tws: int) -> int:
+        """Get the global time of the first valid data."""
+        return tws + self.t_1st_vld if self.is_local_time else self.t_1st_vld
+
+    def _check_after_assign(self, tws: int, end_tick: int) -> None:
+        _t_1st_vld_out_of_range_text = (
+            "the {0} output time of the first valid data should be in the working "
+            + "time from {1} to {2}, but got {3}."
+        )
+
+        # The global time of the first valid data is in [tws, end_tick].
+        gb_t_1st_vld = self.get_global_t_1st_vld(tws)
+        if gb_t_1st_vld < tws or gb_t_1st_vld > end_tick:
+            if self.is_local_time:
+                raise ValueError(
+                    _t_1st_vld_out_of_range_text.format(
+                        "local", "+0", f"+{end_tick - tws + 1}", self.t_1st_vld
+                    )
+                )
+            else:
+                raise ValueError(
+                    _t_1st_vld_out_of_range_text.format(
+                        "global", tws, end_tick, self.t_1st_vld
+                    )
+                )
+
+        if self.n_vld > INFINITE_DATAFLOW:
+            if (
+                t_last_vld := gb_t_1st_vld + (self.n_vld - 1) * self.interval
+            ) > end_tick:
+                raise ValueError(
+                    f"valid data is output after the end time. The neuron stops working at "
+                    f"{end_tick}, but still needs to output at {t_last_vld}."
+                )
+
 
 class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
 
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 7e226a41..14def12f 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -20,7 +20,7 @@
     get_core_mode,
 )
 
-from paibox.base import DataFlowFormat, NeuDyn, INFINITE_DATAFLOW
+from paibox.base import DataFlowFormat, NeuDyn
 from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
@@ -492,8 +492,8 @@ def __init__(
         self._tws = arg_check_non_neg(tick_wait_start, "'tick_wait_start'")
         self._twe = arg_check_non_neg(tick_wait_end, "'tick_wait_end'")
         self._uf = arg_check_pos(unrolling_factor, "'unrolling_factor'")
-        # Default dataflow is infinite and continuous, starting at `tws`.
-        self.oflow_format = DataFlowFormat(self.tick_wait_start)
+        # Default dataflow is infinite and continuous, starting at tws+0.
+        self.oflow_format = DataFlowFormat(0, is_local_time=True)
 
     def __len__(self) -> int:
         return self._n_neuron
@@ -535,41 +535,44 @@ def set_oflow_format(
         self,
         t_1st_vld: Optional[int] = None,
         interval: Optional[int] = None,
-        n_vld: Optional[int] = None
+        n_vld: Optional[int] = None,
+        *,
+        format_type: type[DataFlowFormat] = DataFlowFormat,
     ) -> None:
-        assert hasattr(self, "oflow_format")
-        _t_1st_vld = (
-            t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
-        )
-
-        _interval = (
-            arg_check_pos(interval, "interval")
-            if isinstance(interval, int)
-            else self.oflow_format.interval
-        )
-
-        _n_vld = (
-            arg_check_non_neg(n_vld, "n_vld")
-            if isinstance(n_vld, int)
-            else self.oflow_format.n_vld
-        )
-
-        if _t_1st_vld < self.tick_wait_start:
-            raise ValueError(
-                f"the output time of the first valid data should be greater than or equal to "
-                f"{self.tick_wait_start}, but got {_t_1st_vld}."
+        """Set the attributes of output dataflow format by given arguments."""
+        if hasattr(self, "oflow_format"):
+            _t_1st_vld = (
+                t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
             )
-
-        if _n_vld > INFINITE_DATAFLOW:
-            if (t_last_vld := _t_1st_vld + (_n_vld - 1) * _interval) > self.end_tick:
+            _interval = (
+                arg_check_pos(interval, "interval")
+                if isinstance(interval, int)
+                else self.oflow_format.interval
+            )
+            _n_vld = (
+                arg_check_non_neg(n_vld, "n_vld")
+                if isinstance(n_vld, int)
+                else self.oflow_format.n_vld
+            )
+            self._assign_flow_format(_t_1st_vld, _interval, _n_vld)
+        else:
+            if not (
+                isinstance(interval, int)
+                and isinstance(n_vld, int)
+                and isinstance(t_1st_vld, int)
+            ):
                 raise ValueError(
-                    f"valid data is output after the end time. The neuron stops working at "
-                    f"{self.end_tick}, but still needs to output at {t_last_vld}."
+                    "if 'oflow_format' is not set, 't_1st_vld', 'interval' & 'n_vld' must be set."
                 )
 
-        self.oflow_format.t_1st_vld = _t_1st_vld
-        self.oflow_format.interval = _interval
-        self.oflow_format.n_vld = _n_vld
+            self.oflow_format = format_type(t_1st_vld, interval, n_vld)
+            self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
+
+    def _assign_flow_format(self, t_1st_vld: int, intv: int, n_vld: int) -> None:
+        self.oflow_format.t_1st_vld = t_1st_vld
+        self.oflow_format.interval = intv
+        self.oflow_format.n_vld = n_vld
+        self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
 
     def __copy__(self) -> "Neuron":
         """Same as `__deepcopy__`."""
diff --git a/tests/test_base.py b/tests/test_base.py
index 5adb2c7f..cd55346d 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -1,7 +1,7 @@
 import pytest
 
 import paibox as pb
-from paibox.base import PAIBoxObject
+from paibox.base import PAIBoxObject, DataFlowFormat
 from paibox.exceptions import RegisterError
 
 
@@ -41,3 +41,20 @@ def test_paiboxobject_nodes():
 
     nodes4 = obj1.nodes(method="absolute", level=-1, include_self=True)
     assert nodes4["obj111"] == obj1
+
+
+class TestDataFlowFormat:
+    def test_dff_infinite_dataflow(self):
+        with pytest.raises((AssertionError, ValueError)):
+            dff = DataFlowFormat(1, 0, -1)
+            _ = dff.t_last_vld
+
+    def test_dff_valid(self):
+        # 1. t1 >= tws, t_last > endtick
+        dff1 = DataFlowFormat(10, 3, 10, is_local_time=False)
+        with pytest.raises(ValueError):
+            dff1._check_after_assign(8, 36)
+
+        # 2. t1 >= tws, t_last <= endtick
+        dff2 = DataFlowFormat(10, 3, 10, is_local_time=True)
+        dff2._check_after_assign(2, 39)

From 9eb94048849bba423a554bd7df93341e54cfe946 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 15:48:51 +0800
Subject: [PATCH 163/187] =?UTF-8?q?=E2=9C=A8=20feat(graphs):=20corrected?=
 =?UTF-8?q?=20the=20calculation=20method=20of=20attribute=20`inherent=5Fti?=
 =?UTF-8?q?mestep`.=20Strictly=20annotate=20the=20data=20flow=20format=20f?=
 =?UTF-8?q?or=20the=20computational=20neuron=20of=20the=20semi-folded=20op?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py            | 20 +++++++++++++-------
 paibox/components/functional.py     | 16 ++++++++++++----
 tests/components/test_functional.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index c3e2751d..0f1f68ad 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -127,7 +127,7 @@ def _pre_build(self, **build_options) -> None:
                 # checks. These additional checks may be removed as more network structures will be supported.
 
                 # Currently, `LinearSemiFolded` is at the end of the network, since it will change the form of
-                # the input data stream, and its effective output is at the same time.
+                # the input dataflow, and its effective output is at the same time.
                 semi_linears = modules.subset(LinearSemiFolded)
                 if not all(
                     len(succ_dg_semi_ops[linear]) == 0 for linear in semi_linears
@@ -172,9 +172,14 @@ def _update_graph(self, **build_options) -> None:
         self.inodes = self._raw_nodes.subset(InputProj)
 
         # By default, nodes with out-degree = 0 are considered as output nodes.
-        self.onodes = self._raw_nodes.key_on_condition(
-            lambda node: self.degree_of_nodes[node].out_degree == 0
-        )  # type: ignore
+        # TODO A node with out-degree can also be an output node. However, no network for now has this topology.
+        self.onodes = Collector(
+            {
+                k: cast(DestNodeType, v)
+                for k, v in self._raw_nodes.items()
+                if self.degree_of_nodes[k].out_degree == 0
+            }
+        ).not_subset(InputProj)
 
         for name, node in self._raw_nodes.items():
             self.nodes[name] = NodeAttr(
@@ -525,9 +530,10 @@ def _find_rg_by_cb(
     @property
     def inherent_timestep(self) -> int:
         self.build_check()
-        _, distance = get_longest_path(self.succ_dg, self.ordered_nodes)
-
-        return distance
+        return max(
+            n.oflow_format.get_global_t_1st_vld(n.tick_wait_start)
+            for n in self.onodes.values()
+        )
 
     @property
     def graph_name_repr(self) -> str:
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index e3701a7f..be7897c9 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -932,7 +932,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_linear.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(ih):
@@ -1080,7 +1082,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_conv2d.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(kw):
@@ -1243,7 +1247,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_pool2d.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(kw):
@@ -1381,7 +1387,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_pool2d.set_oflow_format(
-            interval=self.oflow_format.interval, n_vld=self.oflow_format.n_vld
+            self.oflow_format.t_1st_vld,
+            self.oflow_format.interval,
+            self.oflow_format.n_vld,
         )
 
         for i in range(kw):
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 56c80471..4aae8961 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -1043,6 +1043,16 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                         ],
                     )
 
+                    assert conv2d_list[i_conv].tick_wait_start + t_1st_vld_data[
+                        i_conv
+                    ] + i * semi_vld_out_intv[i_conv] - 1 == conv2d_list[
+                        i_conv
+                    ].tick_wait_start + conv2d_list[
+                        i_conv
+                    ].oflow_format.t_at_idx(
+                        i
+                    )
+
             # x is the reference result of the last convolution.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
 
@@ -1053,6 +1063,10 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                     linear.tick_wait_start + linear.oflow_format.t_last_vld
                 ],
             )
+            assert (
+                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
+                == linear.tick_wait_start + linear.oflow_format.t_last_vld
+            )
 
     @pytest.mark.parametrize(
         "ishape_chw, n_pool, kshape_hw, stride, padding, out_features, pool_type",
@@ -1225,6 +1239,16 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                         ],
                     )
 
+                    assert pool2d_list[i_pool].tick_wait_start + t_1st_vld_data[
+                        i_pool
+                    ] + i * semi_vld_out_intv[i_pool] - 1 == pool2d_list[
+                        i_pool
+                    ].tick_wait_start + pool2d_list[
+                        i_pool
+                    ].oflow_format.t_at_idx(
+                        i
+                    )
+
             # x is the reference result of the last pooling.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
 
@@ -1236,6 +1260,11 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                 ],
             )
 
+            assert (
+                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
+                == linear.tick_wait_start + linear.oflow_format.t_last_vld
+            )
+
     @pytest.mark.parametrize(
         "shape, weight",
         [

From f7733804e4386a02bb98cf7ce8f68ae8f5777b2b Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 15:50:20 +0800
Subject: [PATCH 164/187] =?UTF-8?q?=F0=9F=8F=B7=EF=B8=8F=20typing(neuron):?=
 =?UTF-8?q?=20add=20typed=20dict=20`ExtraNeuAttrKwds`=20to=20check=20extra?=
 =?UTF-8?q?=20keywords=20passing=20to=20neurons?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/modules.py        |  4 ++--
 paibox/components/neuron/base.py    | 10 +++++-----
 paibox/components/neuron/neurons.py | 25 +++++++++++++++----------
 paibox/components/neuron/utils.py   | 24 ++++++++++++++++++++++--
 4 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 41e06fdc..2c13edef 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -14,7 +14,7 @@
 from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
-from .neuron.utils import _input_width_format, _RTModeKwds, _spike_width_format
+from .neuron.utils import _input_width_format, RTModeKwds, _spike_width_format
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
@@ -91,7 +91,7 @@ class NeuModule(NeuDyn, BuildingModule):
     """#N of outputs."""
     inherent_delay: int = 0
     """Internal delay of the module, relative to the external."""
-    rt_mode_kwds: _RTModeKwds
+    rt_mode_kwds: RTModeKwds
     mode: CoreMode
 
     def __init__(
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 14def12f..bef9976b 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -21,7 +21,7 @@
 )
 
 from paibox.base import DataFlowFormat, NeuDyn
-from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
+from paibox.exceptions import ConfigInvalidError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
@@ -44,7 +44,7 @@
     _input_width_format,
     _leak_v_check,
     _mask,
-    _RTModeKwds,
+    RTModeKwds,
     _spike_width_format,
     vjt_overflow,
 )
@@ -57,7 +57,7 @@
 class MetaNeuron:
     """Meta neuron"""
 
-    rt_mode_kwds: _RTModeKwds
+    rt_mode_kwds: RTModeKwds
     mode: CoreMode
 
     def __init__(
@@ -96,8 +96,8 @@ def __init__(
         # check whether the mode is valid
         self.mode = get_core_mode(input_width, spike_width, snn_en)
 
-        if pool_max == True and self.mode != CoreMode.MODE_ANN:
-            raise NotSupportedError(
+        if pool_max and self.mode != CoreMode.MODE_ANN:
+            raise ConfigInvalidError(
                 f"max pooling is only supported in {CoreMode.MODE_ANN.name}, "
                 f"but got {self.mode.name}."
             )
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 85e3df47..3f7f0279 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -8,10 +8,15 @@
 from paibox.types import LEAK_V_DTYPE, DataType, Shape
 
 from .base import Neuron
-from .utils import LEAK_V_MAX
+from .utils import LEAK_V_MAX, ExtraNeuAttrKwds
+
+if sys.version_info >= (3, 12):
+    from typing import Unpack
+else:
+    from typing_extensions import Unpack
 
 if sys.version_info >= (3, 13):
-    from typing import deprecated
+    from warnings import deprecated
 else:
     from typing_extensions import deprecated
 
@@ -37,7 +42,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """IF neuron.
 
@@ -93,7 +98,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """LIF neuron.
 
@@ -152,7 +157,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """Tonic spiking neuron.
 
@@ -178,7 +183,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """Phasic spiking neuron. Once the neuron receives `N` spikes and fires, it will reset to   \
             the negative floor and never fires again. `N` is `fire_step`.
@@ -213,7 +218,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """A neuron that always outputs 1 as long as it starts working.
 
@@ -245,7 +250,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """Bypass neuron. Output is equal to input.
 
@@ -279,7 +284,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         """General neuron used in ANN mode. Positive threshold = 1, negative threshold = 0."""
         kwargs["bit_truncation"] = bit_trunc
@@ -299,7 +304,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs,
+        **kwargs: Unpack[ExtraNeuAttrKwds],
     ) -> None:
         super().__init__(
             shape, bias=0, bit_trunc=8, keep_shape=keep_shape, name=name, **kwargs
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index 1d9ea2ff..dd58006f 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -2,7 +2,12 @@
 from typing import Literal, TypedDict, Union
 
 import numpy as np
-from paicorelib import InputWidthFormat, SNNModeEnable, SpikeWidthFormat
+from paicorelib import (
+    InputWidthFormat,
+    SNNModeEnable,
+    SpikeWidthFormat,
+    MaxPoolingEnable,
+)
 from paicorelib.framelib.utils import _mask
 from paicorelib.ram_model import (
     BIT_TRUNCATE_MAX,
@@ -107,9 +112,24 @@ def _get_neu_out_dtype(
         return NEUOUT_U8_DTYPE
 
 
-class _RTModeKwds(TypedDict):
+class RTModeKwds(TypedDict):
     """A typed keywords for runtime mode. Only for checking if necessary."""
 
     input_width: InputWidthFormat
     spike_width: SpikeWidthFormat
     snn_en: SNNModeEnable
+
+
+class ExtraNeuAttrKwds(TypedDict, total=False):
+    """A typed keywords for extra neuron attributes."""
+
+    bit_truncation: int  # For ANNNeuron
+    delay: int
+    tick_wait_start: int
+    tick_wait_end: int
+    input_width: Union[L[1, 8], InputWidthFormat]
+    spike_width: Union[L[1, 8], SpikeWidthFormat]
+    snn_en: Union[bool, SNNModeEnable]
+    pool_max: Union[bool, MaxPoolingEnable]
+    unrolling_factor: int
+    overflow_strict: bool

From 3ddc9fb32162c2869f51b0455802a9ea0ce27a91 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 16:05:46 +0800
Subject: [PATCH 165/187] =?UTF-8?q?=F0=9F=90=9B=20bugfix(compile):=20if=20?=
 =?UTF-8?q?compiling=20with=20`core=5Festimate=5Fonly`=20turned=20on,=20pr?=
 =?UTF-8?q?event=20exporting=20the=20compiled=20results?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/mapper.py | 19 ++++++++++++++++---
 paibox/exceptions.py     |  6 ++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index 6012d7fc..b91e2814 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -8,7 +8,7 @@
 
 from paibox.base import SynSys
 from paibox.components import Neuron
-from paibox.exceptions import ConfigInvalidError, ResourceError
+from paibox.exceptions import CompileError, ConfigInvalidError, ResourceError
 from paibox.network import DynSysGroup
 
 from .conf_exporting import *
@@ -71,6 +71,9 @@ def __init__(self) -> None:
             chip_list=_BACKEND_CONTEXT["target_chip_addr"]
         )
 
+        self._core_estimate_only = False
+        """Wether this compilation is for core estimation only. If so, no core will be assigned."""
+
         self.clear()
 
     def clear(self) -> None:
@@ -90,6 +93,8 @@ def clear(self) -> None:
         self.n_core_required = 0
         self.n_core_occupied = 0
 
+        self._core_estimate_only = False
+
         # Set default cflags
         _BACKEND_CONTEXT.cflags.clear()
         set_cflag(enable_wp_opt=True)
@@ -169,6 +174,8 @@ def compile(
             set_cflag(multicast_optim=True)
             set_cflag(multicast_optim_nodes=_mul_optim_nodes)
 
+        self._core_estimate_only = core_estimate_only
+
         """Preperation.
             1. Check whether the PAIGraph has built.
             2. Set global compilation flags.
@@ -192,9 +199,9 @@ def compile(
         self.cb_axon_grouping()
 
         """Core coordinate assignment."""
-        self.coord_assign(core_estimate_only)
+        self.coord_assign(self._core_estimate_only)
 
-        if core_estimate_only:
+        if self._core_estimate_only:
             return GraphInfo(
                 name=self.graph.graph_name_repr,
                 input={},
@@ -619,6 +626,12 @@ def export(
 
         Return: total configurations in dictionary format.
         """
+        if self._core_estimate_only:
+            raise CompileError(
+                "the current compilation is only for core estimation. "
+                "Please disable 'core_estimate_only' and compile again before exporting."
+            )
+
         if format not in ("bin", "npy", "txt"):
             raise ValueError(f"format {format} is not supported.")
 
diff --git a/paibox/exceptions.py b/paibox/exceptions.py
index 55514a43..bcc4d32a 100644
--- a/paibox/exceptions.py
+++ b/paibox/exceptions.py
@@ -74,6 +74,12 @@ class FunctionalError(PAIBoxError, RuntimeError):
     pass
 
 
+class CompileError(PAIBoxError, RuntimeError):
+    """Exception for compilation."""
+
+    pass
+
+
 class RoutingError(PAIBoxError):
     """Exception for routing tree."""
 

From 51e7aa06fa3fccb5bc624581f37eceef36f8b8dc Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 2 Dec 2024 16:16:14 +0800
Subject: [PATCH 166/187] =?UTF-8?q?=F0=9F=9A=B8=20typing:=20update=20typin?=
 =?UTF-8?q?g=20&=20error=20handling.=20Removed=20useless=20types?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py    | 66 +++++++++++++++++++------------------
 paibox/backend/mapper.py    | 27 +++++++--------
 paibox/backend/placement.py |  8 +++--
 paibox/backend/routing.py   |  9 ++---
 paibox/backend/types.py     | 13 +++-----
 paibox/exceptions.py        |  8 ++++-
 paibox/utils.py             | 16 ++++-----
 7 files changed, 74 insertions(+), 73 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 0f1f68ad..69a117e7 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -2,14 +2,18 @@
 from collections import defaultdict
 from collections.abc import Iterable, Mapping, Sequence
 from dataclasses import dataclass, field
-from typing import Any, TypeVar, Union
+from typing import Any, TypeVar, Union, cast
 
 from paicorelib import HwConfig
 
 from paibox.collector import Collector
 from paibox.components import FullConnectedSyn, InputProj, NeuModule, Neuron
 from paibox.components.functional import LinearSemiFolded
-from paibox.exceptions import GraphBuildError, GraphConnectionError, NotSupportedError
+from paibox.exceptions import (
+    GraphBuildError,
+    GraphConnectionError,
+    GraphNotSupportedError,
+)
 from paibox.network import DynSysGroup
 from paibox.utils import check_elem_unique
 
@@ -132,7 +136,7 @@ def _pre_build(self, **build_options) -> None:
                 if not all(
                     len(succ_dg_semi_ops[linear]) == 0 for linear in semi_linears
                 ):
-                    raise NotSupportedError(
+                    raise GraphNotSupportedError(
                         "currently, the semi-folded linear can only be used as output of the network."
                     )
 
@@ -183,9 +187,7 @@ def _update_graph(self, **build_options) -> None:
 
         for name, node in self._raw_nodes.items():
             self.nodes[name] = NodeAttr(
-                node=node,
-                position=self._node_pos(name),
-                degree=self.degree_of_nodes[name],
+                node, self._node_pos(name), self.degree_of_nodes[name]
             )
 
         self.ordered_nodes = toposort(self.succ_dg)
@@ -220,7 +222,7 @@ def topo_support_check(self) -> None:
             onode.num_out > HwConfig.N_FANIN_PER_DENDRITE_MAX
             for onode in self.onodes.values()
         ):
-            raise NotSupportedError(
+            raise GraphNotSupportedError(
                 f"only output nodes with no more than {HwConfig.N_FANIN_PER_DENDRITE_MAX} "
                 f"neurons are supported."
             )
@@ -541,7 +543,7 @@ def graph_name_repr(self) -> str:
         return _prefix + "_and_".join(network.name for network in self._raw_networks)
 
 
-_NT = TypeVar("_NT", CoreBlock, NodeName, RoutingGroup)
+_NT = TypeVar("_NT", CoreBlock, NodeName, RoutingGroup, MergedSuccGroup)
 _T = TypeVar("_T")
 
 
@@ -557,7 +559,7 @@ def _degree_check(
                     if isinstance(succ_node, CoreBlock)
                     else str(succ_node)
                 )
-                raise NotSupportedError(
+                raise GraphNotSupportedError(
                     f"If out-degree of a node is greater than 1, the in-degree of its sucessors must be 1. "
                     f"However, in-degree of {_node_repr} is {degree_of_nodes[succ_node].in_degree}."
                 )
@@ -570,7 +572,7 @@ def find_cycles(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[list[_NT]]:
     stack_set: set[_NT] = set()  # 方便快速检查路径中的节点
 
     # 深度优先搜索的辅助函数
-    def dfs(node: _NT):
+    def dfs(node: _NT) -> None:
         if node in stack_set:  # 检测到环
             cycle_start_index = stack.index(node)
             cycles.append(stack[cycle_start_index:])
@@ -596,45 +598,45 @@ def dfs(node: _NT):
     return cycles
 
 
-def merge_overlap(groups: Iterable[Iterable[_NT]]) -> list[list[_NT]]:
+def merge_overlap(groups: Iterable[Sequence[_NT]]) -> list[list[_NT]]:
     # 并查集数据结构
     parent: dict[_NT, _NT] = dict()
 
     # 查找集合的根节点
-    def find(x):
+    def find(x: _NT) -> _NT:
         if parent[x] != x:
             parent[x] = find(parent[x])
+
         return parent[x]
 
     # 合并两个集合
-    def union(x, y):
-        rootX = find(x)
-        rootY = find(y)
-        if rootX != rootY:
-            parent[rootY] = rootX
+    def union(x, y) -> None:
+        rootx = find(x)
+        rooty = find(y)
+        if rootx != rooty:
+            parent[rooty] = rootx
 
     # 初始化并查集
     for group in groups:
-        for element in group:
-            if element not in parent:
-                parent[element] = element
+        for elem in group:
+            if elem not in parent:
+                parent[elem] = elem
 
     # 合并所有相互重叠的环
     for group in groups:
-        first_element = group[0]
-        for element in group[1:]:
-            union(first_element, element)
+        first_elem = group[0]
+        for elem in group[1:]:
+            union(first_elem, elem)
 
     # 根据并查集结果，将所有节点归类到同一个集合中
-    merged_groups: dict[_NT, list[_NT]] = dict()
-    for element in parent:
-        root = find(element)
-        if root not in merged_groups:
-            merged_groups[root] = []
-        merged_groups[root].append(element)
+    mgrps: dict[_NT, list[_NT]] = dict()
+    for elem in parent:
+        root = find(elem)
+        if root not in mgrps:
+            mgrps[root] = []
+        mgrps[root].append(elem)
 
-    # 将结果转换为列表列表形式
-    return list(merged_groups.values())
+    return list(mgrps.values())
 
 
 def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
@@ -691,7 +693,7 @@ def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
                 vertices.add(m)
 
     if any(incoming_edges.get(v, None) for v in directed_edges):
-        raise NotSupportedError("the graph with cycles is not supported.")
+        raise GraphNotSupportedError("the graph with cycles is not supported.")
 
     return ordered
 
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index b91e2814..e0bfb715 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -45,10 +45,11 @@
 
 
 class Mapper:
-    graph = PAIGraph()
+    graph: PAIGraph
     graph_info: GraphInfo
 
     def __init__(self) -> None:
+        self.graph = PAIGraph()
         self.core_blocks: list[CoreBlock] = []
         """List for core blocks in the network."""
         self.succ_core_blocks: dict[CoreBlock, list[CoreBlock]] = defaultdict(list)
@@ -381,7 +382,7 @@ def config_export(self) -> GraphInfo:
         ]:
             raise ConfigInvalidError(
                 f"the output chip address {ochip_coord} should not overlap with the "
-                f"chip addresses, but got {_BACKEND_CONTEXT._target_chip_addr_repr()}."
+                f"target chip addresses, but got {_BACKEND_CONTEXT._target_chip_addr_repr()}."
             )
 
         input_nodes_info = self._inpproj_config_export()
@@ -704,27 +705,27 @@ def _find_dest_cb_by_nseg(
         return dest_cb_of_nseg
 
 
-def cycle_merge(merged_sgrps: list[MergedSuccGroup]):
-    succ_merged_sgrps: dict[MergedSuccGroup, list[MergedSuccGroup]] = dict()
+def cycle_merge(merged_sgrps: list[MergedSuccGroup]) -> list[MergedSuccGroup]:
+    succ_merged_sgrps: dict[MergedSuccGroup, list[MergedSuccGroup]] = defaultdict(list)
+
     for msgrp in merged_sgrps:
-        succ_merged_sgrps[msgrp] = []
-        nodes = set(msgrp.nodes)
         for _msgrp in merged_sgrps:
             if msgrp == _msgrp:
                 continue
-            if not nodes.isdisjoint(_msgrp.input_nodes):
+            if not msgrp.nodes.isdisjoint(_msgrp.input_nodes):
                 succ_merged_sgrps[msgrp].append(_msgrp)
 
     cycles: list[list[MergedSuccGroup]] = find_cycles(succ_merged_sgrps)
     merged_cycles: list[list[MergedSuccGroup]] = merge_overlap(cycles)
 
     processed_merged_cycles: list[MergedSuccGroup] = list()
-    remaining_merged_sgrps: set[MergedSuccGroup] = set(merged_sgrps)
-    for merged_cycle in merged_cycles:
-        processed_merged_cycles.append(MergedSuccGroup.merge(merged_cycle))
-        for msgrp in merged_cycle:
-            remaining_merged_sgrps.remove(msgrp)
-    processed_merged_cycles.extend(remaining_merged_sgrps)
+    remaining_msgrps: set[MergedSuccGroup] = set(merged_sgrps)
+    for mc in merged_cycles:
+        processed_merged_cycles.append(MergedSuccGroup.merge(mc))
+        for msgrp in mc:
+            remaining_msgrps.remove(msgrp)
+
+    processed_merged_cycles.extend(remaining_msgrps)
     return processed_merged_cycles
 
 
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index e77e0035..3fc370b9 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -1,6 +1,6 @@
 import math
 import warnings
-from typing import ClassVar, Literal, Optional, overload
+from typing import ClassVar, Literal, Optional, cast, overload
 
 import numpy as np
 from paicorelib import LCN_EX, ChipCoord, Coord, CoreMode, HwConfig, MaxPoolingEnable
@@ -177,7 +177,9 @@ def shape(self) -> tuple[int, int]:
     @property
     def source(self) -> list[SourceNodeType]:
         """Ordered unique source nodes."""
-        return list(set([parent.source for parent in self.obj]))
+        return cast(
+            list[SourceNodeType], list(set([parent.source for parent in self.obj]))
+        )
 
     @property
     def axons(self) -> list[SourceNodeType]:
@@ -186,7 +188,7 @@ def axons(self) -> list[SourceNodeType]:
     @property
     def dest(self) -> list[DestNodeType]:
         """Ordered unique destination nodes."""
-        return list(set([parent.dest for parent in self.obj]))
+        return cast(list[DestNodeType], list(set([parent.dest for parent in self.obj])))
 
     def n_axon_of(self, index: int) -> int:
         """Get the #N of axons of `index`-th source neuron."""
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 9154d185..2e854d6d 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -11,19 +11,14 @@
 from paicorelib import RoutingLevel as Level
 from paicorelib.routing_defs import MAX_ROUTING_PATH_LENGTH
 
-from paibox.exceptions import (
-    GraphBuildError,
-    PAIBoxDeprecationWarning,
-    ResourceError,
-    RoutingError,
-)
+from paibox.exceptions import PAIBoxDeprecationWarning, ResourceError, RoutingError
 
 from .conf_types import CorePlmConfInChip
 from .placement import CoreBlock, EmptyCorePlacement
 from .types import *
 
 if sys.version_info >= (3, 13):
-    from typing import deprecated
+    from warnings import deprecated
 else:
     from typing_extensions import deprecated
 
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index 778e3853..b8f34f3b 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -30,7 +30,6 @@
     "NodeDegree",
     "NodeAttr",
     "EdgeAttr",
-    "PartitionedEdges",
     "NeuSlice",
     "NeuSegment",
     "NeuSegOfCorePlm",
@@ -88,23 +87,19 @@ def copy(self) -> "NodeDegree":
         return self.__deepcopy__()
 
 
-class NodeAttr(NamedTuple):
+@dataclass
+class NodeAttr:
     node: NodeType
     position: NodePosition
     degree: NodeDegree
 
 
-class EdgeAttr(NamedTuple):
+@dataclass
+class EdgeAttr:  # TODO FIXME distance?
     edge: EdgeType
     distance: int
 
 
-class PartitionedEdges(NamedTuple):
-    edges: set[EdgeType]
-    rg_id: int
-    rt_mode: CoreMode = CoreMode.MODE_SNN  # XXX Temp solution
-
-
 NeuSlice: TypeAlias = slice
 
 
diff --git a/paibox/exceptions.py b/paibox/exceptions.py
index bcc4d32a..e5204ad0 100644
--- a/paibox/exceptions.py
+++ b/paibox/exceptions.py
@@ -57,7 +57,13 @@ class GraphConnectionError(GraphBuildError):
 
 
 class NotSupportedError(PAIBoxError, NotImplementedError):
-    """Exception for a certain function not supported."""
+    """Exception for unsupported functions."""
+
+    pass
+
+
+class GraphNotSupportedError(GraphBuildError, NotSupportedError):
+    """Eception for unsupported structures of graph."""
 
     pass
 
diff --git a/paibox/utils.py b/paibox/utils.py
index 6c43864b..906933cf 100644
--- a/paibox/utils.py
+++ b/paibox/utils.py
@@ -155,33 +155,33 @@ def reverse_16bit(x: int) -> int:
     return ((x >> 8) | (x << 8)) & 0xFFFF
 
 
+def _get_desc(desc: Optional[str] = None) -> str:
+    return "value" if desc is None else desc
+
+
 def arg_check_pos(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg < 1:
-        raise ValueError(f"{_desc} must be positive, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be positive, but got {arg}.")
 
     return arg
 
 
 def arg_check_non_pos(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg > 0:
-        raise ValueError(f"{_desc} must be non-positive, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be non-positive, but got {arg}.")
 
     return arg
 
 
 def arg_check_neg(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg > -1:
-        raise ValueError(f"{_desc} must be negative, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be negative, but got {arg}.")
 
     return arg
 
 
 def arg_check_non_neg(arg: int, desc: Optional[str] = None) -> int:
-    _desc = "value" if desc is None else f"{desc}"
     if arg < 0:
-        raise ValueError(f"{_desc} must be non-negative, but got {arg}.")
+        raise ValueError(f"{_get_desc(desc)} must be non-negative, but got {arg}.")
 
     return arg

From c550954a1106e33dba92719070e1862ac6708d00 Mon Sep 17 00:00:00 2001
From: KafCoppelia <69038090+KafCoppelia@users.noreply.github.com>
Date: Mon, 2 Dec 2024 19:30:48 +0800
Subject: [PATCH 167/187] Revert "Feat(graphs): label the data flow format in
 each neuron of the network"

---
 .github/workflows/codecov.yml       |   2 +-
 .pre-commit-config.yaml             |   1 +
 paibox/backend/graphs.py            |  86 +++++++++---------
 paibox/backend/mapper.py            |  46 ++++------
 paibox/backend/placement.py         |   8 +-
 paibox/backend/routing.py           |   9 +-
 paibox/backend/types.py             |  13 ++-
 paibox/base.py                      |  83 ------------------
 paibox/components/_modules.py       |  52 +++++++----
 paibox/components/functional.py     | 131 +++++++++++-----------------
 paibox/components/modules.py        |   4 +-
 paibox/components/neuron/base.py    |  59 ++-----------
 paibox/components/neuron/neurons.py |  25 +++---
 paibox/components/neuron/utils.py   |  24 +----
 paibox/exceptions.py                |  14 +--
 paibox/network.py                   |  14 +--
 paibox/utils.py                     |  16 ++--
 pyproject.toml                      |   5 +-
 tests/components/test_functional.py |  42 ++-------
 tests/test_base.py                  |  19 +---
 20 files changed, 209 insertions(+), 444 deletions(-)

diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
index 35d36c57..05f9ce7b 100644
--- a/.github/workflows/codecov.yml
+++ b/.github/workflows/codecov.yml
@@ -22,7 +22,7 @@ jobs:
   pytest:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
         os: [ubuntu-latest, windows-latest]
     runs-on: ${{ matrix.os }}
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b5b4b101..490b6d15 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,6 +42,7 @@ repos:
       - id: check-symlinks
       - id: check-merge-conflict
       - id: mixed-line-ending
+      - id: name-tests-test
         args: [--pytest-test-first]
       - id: requirements-txt-fixer
       - id: pretty-format-json
diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 69a117e7..c3e2751d 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -2,18 +2,14 @@
 from collections import defaultdict
 from collections.abc import Iterable, Mapping, Sequence
 from dataclasses import dataclass, field
-from typing import Any, TypeVar, Union, cast
+from typing import Any, TypeVar, Union
 
 from paicorelib import HwConfig
 
 from paibox.collector import Collector
 from paibox.components import FullConnectedSyn, InputProj, NeuModule, Neuron
 from paibox.components.functional import LinearSemiFolded
-from paibox.exceptions import (
-    GraphBuildError,
-    GraphConnectionError,
-    GraphNotSupportedError,
-)
+from paibox.exceptions import GraphBuildError, GraphConnectionError, NotSupportedError
 from paibox.network import DynSysGroup
 from paibox.utils import check_elem_unique
 
@@ -131,12 +127,12 @@ def _pre_build(self, **build_options) -> None:
                 # checks. These additional checks may be removed as more network structures will be supported.
 
                 # Currently, `LinearSemiFolded` is at the end of the network, since it will change the form of
-                # the input dataflow, and its effective output is at the same time.
+                # the input data stream, and its effective output is at the same time.
                 semi_linears = modules.subset(LinearSemiFolded)
                 if not all(
                     len(succ_dg_semi_ops[linear]) == 0 for linear in semi_linears
                 ):
-                    raise GraphNotSupportedError(
+                    raise NotSupportedError(
                         "currently, the semi-folded linear can only be used as output of the network."
                     )
 
@@ -176,18 +172,15 @@ def _update_graph(self, **build_options) -> None:
         self.inodes = self._raw_nodes.subset(InputProj)
 
         # By default, nodes with out-degree = 0 are considered as output nodes.
-        # TODO A node with out-degree can also be an output node. However, no network for now has this topology.
-        self.onodes = Collector(
-            {
-                k: cast(DestNodeType, v)
-                for k, v in self._raw_nodes.items()
-                if self.degree_of_nodes[k].out_degree == 0
-            }
-        ).not_subset(InputProj)
+        self.onodes = self._raw_nodes.key_on_condition(
+            lambda node: self.degree_of_nodes[node].out_degree == 0
+        )  # type: ignore
 
         for name, node in self._raw_nodes.items():
             self.nodes[name] = NodeAttr(
-                node, self._node_pos(name), self.degree_of_nodes[name]
+                node=node,
+                position=self._node_pos(name),
+                degree=self.degree_of_nodes[name],
             )
 
         self.ordered_nodes = toposort(self.succ_dg)
@@ -222,7 +215,7 @@ def topo_support_check(self) -> None:
             onode.num_out > HwConfig.N_FANIN_PER_DENDRITE_MAX
             for onode in self.onodes.values()
         ):
-            raise GraphNotSupportedError(
+            raise NotSupportedError(
                 f"only output nodes with no more than {HwConfig.N_FANIN_PER_DENDRITE_MAX} "
                 f"neurons are supported."
             )
@@ -532,10 +525,9 @@ def _find_rg_by_cb(
     @property
     def inherent_timestep(self) -> int:
         self.build_check()
-        return max(
-            n.oflow_format.get_global_t_1st_vld(n.tick_wait_start)
-            for n in self.onodes.values()
-        )
+        _, distance = get_longest_path(self.succ_dg, self.ordered_nodes)
+
+        return distance
 
     @property
     def graph_name_repr(self) -> str:
@@ -543,7 +535,7 @@ def graph_name_repr(self) -> str:
         return _prefix + "_and_".join(network.name for network in self._raw_networks)
 
 
-_NT = TypeVar("_NT", CoreBlock, NodeName, RoutingGroup, MergedSuccGroup)
+_NT = TypeVar("_NT", CoreBlock, NodeName, RoutingGroup)
 _T = TypeVar("_T")
 
 
@@ -559,7 +551,7 @@ def _degree_check(
                     if isinstance(succ_node, CoreBlock)
                     else str(succ_node)
                 )
-                raise GraphNotSupportedError(
+                raise NotSupportedError(
                     f"If out-degree of a node is greater than 1, the in-degree of its sucessors must be 1. "
                     f"However, in-degree of {_node_repr} is {degree_of_nodes[succ_node].in_degree}."
                 )
@@ -572,7 +564,7 @@ def find_cycles(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[list[_NT]]:
     stack_set: set[_NT] = set()  # 方便快速检查路径中的节点
 
     # 深度优先搜索的辅助函数
-    def dfs(node: _NT) -> None:
+    def dfs(node: _NT):
         if node in stack_set:  # 检测到环
             cycle_start_index = stack.index(node)
             cycles.append(stack[cycle_start_index:])
@@ -598,45 +590,45 @@ def dfs(node: _NT) -> None:
     return cycles
 
 
-def merge_overlap(groups: Iterable[Sequence[_NT]]) -> list[list[_NT]]:
+def merge_overlap(groups: Iterable[Iterable[_NT]]) -> list[list[_NT]]:
     # 并查集数据结构
     parent: dict[_NT, _NT] = dict()
 
     # 查找集合的根节点
-    def find(x: _NT) -> _NT:
+    def find(x):
         if parent[x] != x:
             parent[x] = find(parent[x])
-
         return parent[x]
 
     # 合并两个集合
-    def union(x, y) -> None:
-        rootx = find(x)
-        rooty = find(y)
-        if rootx != rooty:
-            parent[rooty] = rootx
+    def union(x, y):
+        rootX = find(x)
+        rootY = find(y)
+        if rootX != rootY:
+            parent[rootY] = rootX
 
     # 初始化并查集
     for group in groups:
-        for elem in group:
-            if elem not in parent:
-                parent[elem] = elem
+        for element in group:
+            if element not in parent:
+                parent[element] = element
 
     # 合并所有相互重叠的环
     for group in groups:
-        first_elem = group[0]
-        for elem in group[1:]:
-            union(first_elem, elem)
+        first_element = group[0]
+        for element in group[1:]:
+            union(first_element, element)
 
     # 根据并查集结果，将所有节点归类到同一个集合中
-    mgrps: dict[_NT, list[_NT]] = dict()
-    for elem in parent:
-        root = find(elem)
-        if root not in mgrps:
-            mgrps[root] = []
-        mgrps[root].append(elem)
+    merged_groups: dict[_NT, list[_NT]] = dict()
+    for element in parent:
+        root = find(element)
+        if root not in merged_groups:
+            merged_groups[root] = []
+        merged_groups[root].append(element)
 
-    return list(mgrps.values())
+    # 将结果转换为列表列表形式
+    return list(merged_groups.values())
 
 
 def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
@@ -693,7 +685,7 @@ def toposort(directed_edges: Mapping[_NT, Iterable[_NT]]) -> list[_NT]:
                 vertices.add(m)
 
     if any(incoming_edges.get(v, None) for v in directed_edges):
-        raise GraphNotSupportedError("the graph with cycles is not supported.")
+        raise NotSupportedError("the graph with cycles is not supported.")
 
     return ordered
 
diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index e0bfb715..6012d7fc 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -8,7 +8,7 @@
 
 from paibox.base import SynSys
 from paibox.components import Neuron
-from paibox.exceptions import CompileError, ConfigInvalidError, ResourceError
+from paibox.exceptions import ConfigInvalidError, ResourceError
 from paibox.network import DynSysGroup
 
 from .conf_exporting import *
@@ -45,11 +45,10 @@
 
 
 class Mapper:
-    graph: PAIGraph
+    graph = PAIGraph()
     graph_info: GraphInfo
 
     def __init__(self) -> None:
-        self.graph = PAIGraph()
         self.core_blocks: list[CoreBlock] = []
         """List for core blocks in the network."""
         self.succ_core_blocks: dict[CoreBlock, list[CoreBlock]] = defaultdict(list)
@@ -72,9 +71,6 @@ def __init__(self) -> None:
             chip_list=_BACKEND_CONTEXT["target_chip_addr"]
         )
 
-        self._core_estimate_only = False
-        """Wether this compilation is for core estimation only. If so, no core will be assigned."""
-
         self.clear()
 
     def clear(self) -> None:
@@ -94,8 +90,6 @@ def clear(self) -> None:
         self.n_core_required = 0
         self.n_core_occupied = 0
 
-        self._core_estimate_only = False
-
         # Set default cflags
         _BACKEND_CONTEXT.cflags.clear()
         set_cflag(enable_wp_opt=True)
@@ -175,8 +169,6 @@ def compile(
             set_cflag(multicast_optim=True)
             set_cflag(multicast_optim_nodes=_mul_optim_nodes)
 
-        self._core_estimate_only = core_estimate_only
-
         """Preperation.
             1. Check whether the PAIGraph has built.
             2. Set global compilation flags.
@@ -200,9 +192,9 @@ def compile(
         self.cb_axon_grouping()
 
         """Core coordinate assignment."""
-        self.coord_assign(self._core_estimate_only)
+        self.coord_assign(core_estimate_only)
 
-        if self._core_estimate_only:
+        if core_estimate_only:
             return GraphInfo(
                 name=self.graph.graph_name_repr,
                 input={},
@@ -382,7 +374,7 @@ def config_export(self) -> GraphInfo:
         ]:
             raise ConfigInvalidError(
                 f"the output chip address {ochip_coord} should not overlap with the "
-                f"target chip addresses, but got {_BACKEND_CONTEXT._target_chip_addr_repr()}."
+                f"chip addresses, but got {_BACKEND_CONTEXT._target_chip_addr_repr()}."
             )
 
         input_nodes_info = self._inpproj_config_export()
@@ -627,12 +619,6 @@ def export(
 
         Return: total configurations in dictionary format.
         """
-        if self._core_estimate_only:
-            raise CompileError(
-                "the current compilation is only for core estimation. "
-                "Please disable 'core_estimate_only' and compile again before exporting."
-            )
-
         if format not in ("bin", "npy", "txt"):
             raise ValueError(f"format {format} is not supported.")
 
@@ -705,27 +691,27 @@ def _find_dest_cb_by_nseg(
         return dest_cb_of_nseg
 
 
-def cycle_merge(merged_sgrps: list[MergedSuccGroup]) -> list[MergedSuccGroup]:
-    succ_merged_sgrps: dict[MergedSuccGroup, list[MergedSuccGroup]] = defaultdict(list)
-
+def cycle_merge(merged_sgrps: list[MergedSuccGroup]):
+    succ_merged_sgrps: dict[MergedSuccGroup, list[MergedSuccGroup]] = dict()
     for msgrp in merged_sgrps:
+        succ_merged_sgrps[msgrp] = []
+        nodes = set(msgrp.nodes)
         for _msgrp in merged_sgrps:
             if msgrp == _msgrp:
                 continue
-            if not msgrp.nodes.isdisjoint(_msgrp.input_nodes):
+            if not nodes.isdisjoint(_msgrp.input_nodes):
                 succ_merged_sgrps[msgrp].append(_msgrp)
 
     cycles: list[list[MergedSuccGroup]] = find_cycles(succ_merged_sgrps)
     merged_cycles: list[list[MergedSuccGroup]] = merge_overlap(cycles)
 
     processed_merged_cycles: list[MergedSuccGroup] = list()
-    remaining_msgrps: set[MergedSuccGroup] = set(merged_sgrps)
-    for mc in merged_cycles:
-        processed_merged_cycles.append(MergedSuccGroup.merge(mc))
-        for msgrp in mc:
-            remaining_msgrps.remove(msgrp)
-
-    processed_merged_cycles.extend(remaining_msgrps)
+    remaining_merged_sgrps: set[MergedSuccGroup] = set(merged_sgrps)
+    for merged_cycle in merged_cycles:
+        processed_merged_cycles.append(MergedSuccGroup.merge(merged_cycle))
+        for msgrp in merged_cycle:
+            remaining_merged_sgrps.remove(msgrp)
+    processed_merged_cycles.extend(remaining_merged_sgrps)
     return processed_merged_cycles
 
 
diff --git a/paibox/backend/placement.py b/paibox/backend/placement.py
index 3fc370b9..e77e0035 100644
--- a/paibox/backend/placement.py
+++ b/paibox/backend/placement.py
@@ -1,6 +1,6 @@
 import math
 import warnings
-from typing import ClassVar, Literal, Optional, cast, overload
+from typing import ClassVar, Literal, Optional, overload
 
 import numpy as np
 from paicorelib import LCN_EX, ChipCoord, Coord, CoreMode, HwConfig, MaxPoolingEnable
@@ -177,9 +177,7 @@ def shape(self) -> tuple[int, int]:
     @property
     def source(self) -> list[SourceNodeType]:
         """Ordered unique source nodes."""
-        return cast(
-            list[SourceNodeType], list(set([parent.source for parent in self.obj]))
-        )
+        return list(set([parent.source for parent in self.obj]))
 
     @property
     def axons(self) -> list[SourceNodeType]:
@@ -188,7 +186,7 @@ def axons(self) -> list[SourceNodeType]:
     @property
     def dest(self) -> list[DestNodeType]:
         """Ordered unique destination nodes."""
-        return cast(list[DestNodeType], list(set([parent.dest for parent in self.obj])))
+        return list(set([parent.dest for parent in self.obj]))
 
     def n_axon_of(self, index: int) -> int:
         """Get the #N of axons of `index`-th source neuron."""
diff --git a/paibox/backend/routing.py b/paibox/backend/routing.py
index 2e854d6d..9154d185 100644
--- a/paibox/backend/routing.py
+++ b/paibox/backend/routing.py
@@ -11,14 +11,19 @@
 from paicorelib import RoutingLevel as Level
 from paicorelib.routing_defs import MAX_ROUTING_PATH_LENGTH
 
-from paibox.exceptions import PAIBoxDeprecationWarning, ResourceError, RoutingError
+from paibox.exceptions import (
+    GraphBuildError,
+    PAIBoxDeprecationWarning,
+    ResourceError,
+    RoutingError,
+)
 
 from .conf_types import CorePlmConfInChip
 from .placement import CoreBlock, EmptyCorePlacement
 from .types import *
 
 if sys.version_info >= (3, 13):
-    from warnings import deprecated
+    from typing import deprecated
 else:
     from typing_extensions import deprecated
 
diff --git a/paibox/backend/types.py b/paibox/backend/types.py
index b8f34f3b..778e3853 100644
--- a/paibox/backend/types.py
+++ b/paibox/backend/types.py
@@ -30,6 +30,7 @@
     "NodeDegree",
     "NodeAttr",
     "EdgeAttr",
+    "PartitionedEdges",
     "NeuSlice",
     "NeuSegment",
     "NeuSegOfCorePlm",
@@ -87,19 +88,23 @@ def copy(self) -> "NodeDegree":
         return self.__deepcopy__()
 
 
-@dataclass
-class NodeAttr:
+class NodeAttr(NamedTuple):
     node: NodeType
     position: NodePosition
     degree: NodeDegree
 
 
-@dataclass
-class EdgeAttr:  # TODO FIXME distance?
+class EdgeAttr(NamedTuple):
     edge: EdgeType
     distance: int
 
 
+class PartitionedEdges(NamedTuple):
+    edges: set[EdgeType]
+    rg_id: int
+    rt_mode: CoreMode = CoreMode.MODE_SNN  # XXX Temp solution
+
+
 NeuSlice: TypeAlias = slice
 
 
diff --git a/paibox/base.py b/paibox/base.py
index fbfe3959..31e25387 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -1,5 +1,4 @@
 import sys
-from dataclasses import dataclass
 from typing import Any, ClassVar, Literal, Optional
 
 import numpy as np
@@ -257,77 +256,6 @@ def state(self) -> NodeDict:
         return self._memories
 
 
-INFINITE_DATAFLOW = 0
-
-
-@dataclass
-class DataFlowFormat:
-    """Describe in detail the format of valid data in the dataflow."""
-
-    t_1st_vld: int = 0
-    """Global time or a relative time of the first valid data in the dataflow, determined by `is_local_time`."""
-    interval: int = 1
-    """The interval of valid data in the dataflow."""
-    n_vld: int = INFINITE_DATAFLOW
-    """The number of valid data. <0 for infinite dataflow."""
-
-    is_local_time: bool = True
-    """Whether the `t_1st_vld` is relative to the local time(tws+T) of the neuron, or   \
-        relative to the global time of the external input."""
-
-    def t_at_idx(self, idx: int) -> int:
-        """The time of the valid data at the given index."""
-        if self.n_vld > INFINITE_DATAFLOW:
-            assert 0 <= idx <= self.n_vld - 1
-
-        return self.t_1st_vld + idx * self.interval
-
-    def t_at_n(self, n: int) -> int:
-        """The time of the n-th valid data."""
-        return self.t_at_idx(n - 1)
-
-    @property
-    def t_last_vld(self) -> int:
-        """The time of the last valid data."""
-        assert self.n_vld > INFINITE_DATAFLOW
-        return self.t_at_n(self.n_vld)
-
-    def get_global_t_1st_vld(self, tws: int) -> int:
-        """Get the global time of the first valid data."""
-        return tws + self.t_1st_vld if self.is_local_time else self.t_1st_vld
-
-    def _check_after_assign(self, tws: int, end_tick: int) -> None:
-        _t_1st_vld_out_of_range_text = (
-            "the {0} output time of the first valid data should be in the working "
-            + "time from {1} to {2}, but got {3}."
-        )
-
-        # The global time of the first valid data is in [tws, end_tick].
-        gb_t_1st_vld = self.get_global_t_1st_vld(tws)
-        if gb_t_1st_vld < tws or gb_t_1st_vld > end_tick:
-            if self.is_local_time:
-                raise ValueError(
-                    _t_1st_vld_out_of_range_text.format(
-                        "local", "+0", f"+{end_tick - tws + 1}", self.t_1st_vld
-                    )
-                )
-            else:
-                raise ValueError(
-                    _t_1st_vld_out_of_range_text.format(
-                        "global", tws, end_tick, self.t_1st_vld
-                    )
-                )
-
-        if self.n_vld > INFINITE_DATAFLOW:
-            if (
-                t_last_vld := gb_t_1st_vld + (self.n_vld - 1) * self.interval
-            ) > end_tick:
-                raise ValueError(
-                    f"valid data is output after the end time. The neuron stops working at "
-                    f"{end_tick}, but still needs to output at {t_last_vld}."
-                )
-
-
 class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
 
     _delay: int
@@ -338,9 +266,6 @@ class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
     _uf: int
     """unrolling_factor"""
 
-    oflow_format: DataFlowFormat
-    """The format of output data stream"""
-
     def __init__(self, name: Optional[str] = None) -> None:
         super().__init__(name)
         self.master_nodes = NodeDict()
@@ -366,14 +291,6 @@ def tick_wait_end(self) -> int:
     def unrolling_factor(self) -> int:
         return self._uf
 
-    @property
-    def end_tick(self) -> int:
-        """End time of work."""
-        if self.tick_wait_end == 0:
-            return 9999  # Never end
-
-        return self.tick_wait_start + self.tick_wait_end - 1
-
     @unrolling_factor.setter
     def unrolling_factor(self, factor: int) -> None:
         self._uf = arg_check_pos(factor, "'unrolling_factor'")
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index a2a0953d..990b5a77 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,10 +1,12 @@
+import math
 import typing
+from dataclasses import dataclass
 from typing import Literal, Optional, Union
 
 import numpy as np
 from paicorelib import TM, HwConfig
 
-from paibox.base import DataFlowFormat, NeuDyn, NodeList
+from paibox.base import NeuDyn, NodeList
 from paibox.exceptions import ResourceError, ShapeError
 from paibox.types import (
     LEAK_V_DTYPE,
@@ -56,7 +58,7 @@
     "_SpikingPool2dWithV",
     "_SemiFoldedModule",
     "_LinearBase",
-    "SemiFoldedDataFlowFormat",
+    "SemiFoldedStreamAttr",
 ]
 
 
@@ -159,41 +161,61 @@ class _DelayChainANN(_DelayChainBase):
     pass
 
 
-class SemiFoldedDataFlowFormat(DataFlowFormat):
-    pass
+@dataclass(frozen=True)
+class SemiFoldedStreamAttr:
+    """Details of transmission of valid data in semi-folded form data stream."""
+
+    t_1st_vld: int
+    """The time of the first valid data, relative to `t_1st_vld` of the external input."""
+    interval: int
+    """The interval of the output data stream."""
+    n_data: int = 0
+    """The number of valid output data."""
+
+    def t_at(self, n: int) -> int:
+        """The time of the n-th valid data."""
+        if self.n_data > 0:
+            assert 1 <= n <= self.n_data
+
+        return self.t_1st_vld + (n - 1) * self.interval
+
+    @property
+    def t_last_vld(self) -> int:
+        """The time of the last valid data."""
+        assert self.n_data > 0
+        return self.t_at(self.n_data)
 
 
 @set_rt_mode_ann()
 class _SemiFoldedModule(FunctionalModule):
     """Functional modules with interfaces in semi-folded form. Use `build()` of class `HasSemiFoldedIntf`."""
 
-    inherent_delay = 1
-    oflow_format: SemiFoldedDataFlowFormat
+    ostream_attr: SemiFoldedStreamAttr
 
     def build(
         self,
         network: "DynSysGroup",
-        incoming_flow_format: SemiFoldedDataFlowFormat,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
         raise NotImplementedError
 
     def _input_buffer_len_check(
-        self, ich: int, ih: int, kw: int, interval: int
+        self, in_channels: int, in_h: int, kw: int, valid_interval: int
     ) -> None:
         """Check the limit of the semi-folded operators on the input buffer length of the core during the build phase.
 
-        NOTE: The right side of the inequality will only be smaller in the backend. If the condition is not met, an \
-            expection will be raised in the subsequent compilation phase.
+        NOTE: If the condition is not met, an expection will be raised in the subsequent compilation phase.
         """
         E = math.ceil(
-            math.log2(math.ceil(ich * ih * kw / HwConfig.N_FANIN_PER_DENDRITE_ANN))
+            math.log2(
+                math.ceil(in_channels * in_h * kw / HwConfig.N_FANIN_PER_DENDRITE_ANN)
+            )
         )
-
-        if min(ih - kw, kw - 1) * interval + 1 >= (HwConfig.N_TIMESLOT_MAX >> E):
-            _adjust_text = "input size, kernel size or stride along the data flow."
+        deep = min(in_h - kw, kw - 1) * valid_interval + 1
+        if not HwConfig.N_TIMESLOT_MAX / (2**E) > deep:
             raise ResourceError(
-                f"the data arrangement of {self.name}'s input buffer may be wrong. Please adjust the {_adjust_text}."
+                f"the input size of {self.name} is too large. Please adjust the input size or the number of channels."
             )
 
 
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 6755cc5e..37aa6fc9 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -905,26 +905,22 @@ class LinearSemiFolded(_LinearBase, _SemiFoldedModule):
     def build(
         self,
         network: "DynSysGroup",
-        incoming_flow_format: SemiFoldedDataFlowFormat,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
-        # For semi-folded linear, the valid output is at only one timestep.
-        self.oflow_format = SemiFoldedDataFlowFormat(
-            incoming_flow_format.t_last_vld, 1, 1
-        )
-        twe = 1 + self.oflow_format.t_last_vld
+        self.ostream_attr = incoming_stream_attr
+        twe = 1 + self.ostream_attr.t_last_vld
 
         ich, ih = self.source[0].shape_out
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(ich, ih, ih, incoming_flow_format.interval)
-
+            self._input_buffer_len_check(ich, ih, ih, incoming_stream_attr.interval)
         n_delays = NodeList()
         s_delays = NodeList()
         s_weight = NodeList()
 
-        n_linear = ANNNeuron(
+        n_fc = ANNNeuron(
             self.shape_out,
             self.bias,
             self.bit_trunc,
@@ -934,18 +930,13 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
-        n_linear.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
-        )
 
         for i in range(ih):
             neuron = ANNBypassNeuron(
                 shape=(ich, ih),
-                delay=incoming_flow_format.interval * i + 1,
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -963,15 +954,15 @@ def build(
             w = self.weights[ih - i - 1 :: ih, :]
             syn2 = FullConnSyn(
                 neuron,
-                n_linear,
+                n_fc,
                 weights=w,
                 conn_type=ConnType.All2All,
                 name=f"s{i}_{self.name}",
             )
             s_weight.append(syn2)
 
-        generated = [n_linear, *n_delays, *s_delays, *s_weight]
-        self._rebuild_out_intf(network, n_linear, *generated, **build_options)
+        generated = [n_fc, *n_delays, *s_delays, *s_weight]
+        self._rebuild_out_intf(network, n_fc, *generated, **build_options)
 
         return generated
 
@@ -1017,11 +1008,9 @@ def __init__(
         # XXX Do not consider the case when the shape of source neurons needs to be changed, for now.
         # neuron_s.shape_change((in_ch, in_h))
 
-        cout, cin, kh, kw = kernel.shape
+        cout, cin, kh, _ = kernel.shape
         out_h = (in_h - kh + 2 * self.padding[0]) // self.stride[0] + 1
 
-        assert self.padding[0] < kh and self.padding[1] < kw
-
         if in_ch != cin:
             raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
@@ -1045,7 +1034,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_flow_format: SemiFoldedDataFlowFormat,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1058,15 +1047,14 @@ def build(
         _, cin, _, kw = self.kernel.shape
         _, ow = self.shape_out
 
-        self.oflow_format = SemiFoldedDataFlowFormat(
-            incoming_flow_format.t_at_n(kw - self.padding[0]),
-            incoming_flow_format.interval * self.stride[1],
+        self.ostream_attr = SemiFoldedStreamAttr(
+            incoming_stream_attr.t_at(kw - self.padding[0]),
+            incoming_stream_attr.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.oflow_format.t_last_vld
-
+        twe = 1 + self.ostream_attr.t_last_vld
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
 
         n_delays = NodeList()
         n_neg_padding = NodeList()
@@ -1084,18 +1072,12 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
-        n_conv2d.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
-        )
-
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_flow_format.interval * i + 1,
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 name=f"n{i}_delay_{self.name}",
             )
             n_delays.append(neuron)
@@ -1123,13 +1105,13 @@ def build(
         # Add additional negative padding layer to eliminate the incorrect output
         # NOTE: `t_1st_vld` = 0 & `padding[0]` > 0 means the previous layer is
         # an input node. No need to add negative padding layer for this case.
-        if incoming_flow_format.t_1st_vld > 0:
+        if incoming_stream_attr.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, ih),
-                    delay=1 + incoming_flow_format.interval * (kw - 1 - p),
+                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=incoming_flow_format.t_1st_vld,
+                    tick_wait_end=incoming_stream_attr.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1214,7 +1196,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_flow_format: SemiFoldedDataFlowFormat,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1227,20 +1209,20 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.oflow_format = SemiFoldedDataFlowFormat(
-            incoming_flow_format.t_at_n(kw),
-            incoming_flow_format.interval * self.stride[1],
+        self.ostream_attr = SemiFoldedStreamAttr(
+            incoming_stream_attr.t_at(kw),
+            incoming_stream_attr.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.oflow_format.t_last_vld
+        twe = 1 + self.ostream_attr.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
 
         n_delays = NodeList()
         s_delays = NodeList()
 
-        n_pool2d = ANNNeuron(
+        pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
@@ -1249,18 +1231,13 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
-        n_pool2d.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
-        )
 
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_flow_format.interval * i + 1,
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1276,7 +1253,7 @@ def build(
             s_delays.append(syn1)
             syn2 = MaxPoolSyn(
                 neuron,
-                n_pool2d,
+                pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
                     cin, ih, ow, kh, self.stride, (0, 0)
                 ),
@@ -1284,8 +1261,8 @@ def build(
             )
             s_delays.append(syn2)
 
-        generated = [n_pool2d, *n_delays, *s_delays]
-        self._rebuild_out_intf(network, n_pool2d, *generated, **build_options)
+        generated = [pool2d, *n_delays, *s_delays]
+        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
 
         return generated
 
@@ -1325,8 +1302,6 @@ def __init__(
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
         out_h = (in_h - self.kernel_size[0] + 2 * self.padding[0]) // self.stride[0] + 1
-        kh, kw = self.kernel_size
-        assert self.padding[0] < kh and self.padding[1] < kw
 
         super().__init__(
             neuron_s,
@@ -1339,7 +1314,7 @@ def __init__(
     def build(
         self,
         network: "DynSysGroup",
-        incoming_flow_format: SemiFoldedDataFlowFormat,
+        incoming_stream_attr: SemiFoldedStreamAttr,
         **build_options,
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
@@ -1352,15 +1327,15 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.oflow_format = SemiFoldedDataFlowFormat(
-            incoming_flow_format.t_at_n(kw - self.padding[0]),
-            incoming_flow_format.interval * self.stride[1],
+        self.ostream_attr = SemiFoldedStreamAttr(
+            incoming_stream_attr.t_at(kw - self.padding[0]),
+            incoming_stream_attr.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.oflow_format.t_last_vld
+        twe = 1 + self.ostream_attr.t_last_vld
 
         if build_options.get("check_before_compile"):
-            self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
+            self._input_buffer_len_check(cin, ih, kw, incoming_stream_attr.interval)
 
         # NOTE: Division is achieved with the help of output truncation.
         # TODO Since division with a divisor that is an integer power of 2 can only be implemented by
@@ -1380,7 +1355,7 @@ def build(
         s_delays = NodeList()
         s_neg_padding = NodeList()
 
-        n_pool2d = ANNNeuron(
+        pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
             bit_trunc=bit_trunc,
@@ -1389,18 +1364,12 @@ def build(
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
-        n_pool2d.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
-        )
-
         for i in range(kw):
             neuron = ANNBypassNeuron(
                 (cin, ih),
-                delay=incoming_flow_format.interval * i + 1,
+                delay=incoming_stream_attr.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe - incoming_stream_attr.interval * i,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1416,7 +1385,7 @@ def build(
             s_delays.append(syn1)
             syn2 = FullConnSyn(
                 neuron,
-                n_pool2d,
+                pool2d,
                 weights=_poo2d_semifolded_mapping_mask(
                     cin, ih, ow, kh, self.stride, self.padding
                 ),
@@ -1426,13 +1395,13 @@ def build(
             s_delays.append(syn2)
 
         # Add additional negative padding layer to eliminate the incorrect output
-        if incoming_flow_format.t_1st_vld > 0:
+        if incoming_stream_attr.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
                     (cin, ih),
-                    delay=1 + incoming_flow_format.interval * (kw - 1 - p),
+                    delay=1 + incoming_stream_attr.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
-                    tick_wait_end=incoming_flow_format.t_1st_vld,
+                    tick_wait_end=incoming_stream_attr.t_1st_vld,
                     keep_shape=self.keep_shape,
                     name=f"n{p}_pad_{self.name}",
                 )
@@ -1449,7 +1418,7 @@ def build(
 
                 syn2 = FullConnSyn(
                     neuron,
-                    n_pool2d,
+                    pool2d,
                     weights=-_poo2d_semifolded_mapping_mask(
                         cin, ih, ow, kh, self.stride, self.padding
                     ),
@@ -1458,8 +1427,8 @@ def build(
                 )
                 s_neg_padding.append(syn2)
 
-        generated = [n_pool2d, *n_delays, *n_neg_padding, *s_delays, *s_neg_padding]
-        self._rebuild_out_intf(network, n_pool2d, *generated, **build_options)
+        generated = [pool2d, *n_delays, *n_neg_padding, *s_delays, *s_neg_padding]
+        self._rebuild_out_intf(network, pool2d, *generated, **build_options)
 
         return generated
 
diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 703c1a00..41e06fdc 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -14,7 +14,7 @@
 from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
-from .neuron.utils import RTModeKwds, _input_width_format, _spike_width_format
+from .neuron.utils import _input_width_format, _RTModeKwds, _spike_width_format
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
@@ -91,7 +91,7 @@ class NeuModule(NeuDyn, BuildingModule):
     """#N of outputs."""
     inherent_delay: int = 0
     """Internal delay of the module, relative to the external."""
-    rt_mode_kwds: RTModeKwds
+    rt_mode_kwds: _RTModeKwds
     mode: CoreMode
 
     def __init__(
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 635ac3f6..a35fa8b5 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -20,8 +20,8 @@
     get_core_mode,
 )
 
-from paibox.base import DataFlowFormat, NeuDyn
-from paibox.exceptions import ConfigInvalidError, PAIBoxWarning, ShapeError
+from paibox.base import NeuDyn
+from paibox.exceptions import NotSupportedError, PAIBoxWarning, ShapeError
 from paibox.types import (
     NEUOUT_U8_DTYPE,
     VOLTAGE_DTYPE,
@@ -41,10 +41,10 @@
 from .utils import (
     BIT_TRUNCATE_MAX,
     NEG_THRES_MIN,
-    RTModeKwds,
     _input_width_format,
     _leak_v_check,
     _mask,
+    _RTModeKwds,
     _spike_width_format,
     vjt_overflow,
 )
@@ -57,7 +57,7 @@
 class MetaNeuron:
     """Meta neuron"""
 
-    rt_mode_kwds: RTModeKwds
+    rt_mode_kwds: _RTModeKwds
     mode: CoreMode
 
     def __init__(
@@ -96,8 +96,8 @@ def __init__(
         # check whether the mode is valid
         self.mode = get_core_mode(input_width, spike_width, snn_en)
 
-        if pool_max and self.mode != CoreMode.MODE_ANN:
-            raise ConfigInvalidError(
+        if pool_max == True and self.mode != CoreMode.MODE_ANN:
+            raise NotSupportedError(
                 f"max pooling is only supported in {CoreMode.MODE_ANN.name}, "
                 f"but got {self.mode.name}."
             )
@@ -487,13 +487,11 @@ def __init__(
             ),
         )
 
-        """Non-stateful attributes."""
+        """Auxiliary internal stateful attributes for debugging"""
         self._delay = arg_check_pos(delay, "'delay'")
         self._tws = arg_check_non_neg(tick_wait_start, "'tick_wait_start'")
         self._twe = arg_check_non_neg(tick_wait_end, "'tick_wait_end'")
         self._uf = arg_check_pos(unrolling_factor, "'unrolling_factor'")
-        # Default dataflow is infinite and continuous, starting at tws+0.
-        self.oflow_format = DataFlowFormat(0, is_local_time=True)
 
     def __len__(self) -> int:
         return self._n_neuron
@@ -531,49 +529,6 @@ def update(
     def reset_state(self, *args, **kwargs) -> None:
         self.reset_memory()  # Call reset of `StatusMemory`.
 
-    def set_oflow_format(
-        self,
-        t_1st_vld: Optional[int] = None,
-        interval: Optional[int] = None,
-        n_vld: Optional[int] = None,
-        *,
-        format_type: type[DataFlowFormat] = DataFlowFormat,
-    ) -> None:
-        """Set the attributes of output dataflow format by given arguments."""
-        if hasattr(self, "oflow_format"):
-            _t_1st_vld = (
-                t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
-            )
-            _interval = (
-                arg_check_pos(interval, "interval")
-                if isinstance(interval, int)
-                else self.oflow_format.interval
-            )
-            _n_vld = (
-                arg_check_non_neg(n_vld, "n_vld")
-                if isinstance(n_vld, int)
-                else self.oflow_format.n_vld
-            )
-            self._assign_flow_format(_t_1st_vld, _interval, _n_vld)
-        else:
-            if not (
-                isinstance(interval, int)
-                and isinstance(n_vld, int)
-                and isinstance(t_1st_vld, int)
-            ):
-                raise ValueError(
-                    "if 'oflow_format' is not set, 't_1st_vld', 'interval' & 'n_vld' must be set."
-                )
-
-            self.oflow_format = format_type(t_1st_vld, interval, n_vld)
-            self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
-
-    def _assign_flow_format(self, t_1st_vld: int, intv: int, n_vld: int) -> None:
-        self.oflow_format.t_1st_vld = t_1st_vld
-        self.oflow_format.interval = intv
-        self.oflow_format.n_vld = n_vld
-        self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
-
     def __copy__(self) -> "Neuron":
         """Same as `__deepcopy__`."""
         return self.__deepcopy__()
diff --git a/paibox/components/neuron/neurons.py b/paibox/components/neuron/neurons.py
index 3f7f0279..85e3df47 100644
--- a/paibox/components/neuron/neurons.py
+++ b/paibox/components/neuron/neurons.py
@@ -8,15 +8,10 @@
 from paibox.types import LEAK_V_DTYPE, DataType, Shape
 
 from .base import Neuron
-from .utils import LEAK_V_MAX, ExtraNeuAttrKwds
-
-if sys.version_info >= (3, 12):
-    from typing import Unpack
-else:
-    from typing_extensions import Unpack
+from .utils import LEAK_V_MAX
 
 if sys.version_info >= (3, 13):
-    from warnings import deprecated
+    from typing import deprecated
 else:
     from typing_extensions import deprecated
 
@@ -42,7 +37,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         """IF neuron.
 
@@ -98,7 +93,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         """LIF neuron.
 
@@ -157,7 +152,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         """Tonic spiking neuron.
 
@@ -183,7 +178,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         """Phasic spiking neuron. Once the neuron receives `N` spikes and fires, it will reset to   \
             the negative floor and never fires again. `N` is `fire_step`.
@@ -218,7 +213,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         """A neuron that always outputs 1 as long as it starts working.
 
@@ -250,7 +245,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         """Bypass neuron. Output is equal to input.
 
@@ -284,7 +279,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         """General neuron used in ANN mode. Positive threshold = 1, negative threshold = 0."""
         kwargs["bit_truncation"] = bit_trunc
@@ -304,7 +299,7 @@ def __init__(
         *,
         keep_shape: bool = True,
         name: Optional[str] = None,
-        **kwargs: Unpack[ExtraNeuAttrKwds],
+        **kwargs,
     ) -> None:
         super().__init__(
             shape, bias=0, bit_trunc=8, keep_shape=keep_shape, name=name, **kwargs
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index 349ea050..1d9ea2ff 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -2,12 +2,7 @@
 from typing import Literal, TypedDict, Union
 
 import numpy as np
-from paicorelib import (
-    InputWidthFormat,
-    MaxPoolingEnable,
-    SNNModeEnable,
-    SpikeWidthFormat,
-)
+from paicorelib import InputWidthFormat, SNNModeEnable, SpikeWidthFormat
 from paicorelib.framelib.utils import _mask
 from paicorelib.ram_model import (
     BIT_TRUNCATE_MAX,
@@ -112,24 +107,9 @@ def _get_neu_out_dtype(
         return NEUOUT_U8_DTYPE
 
 
-class RTModeKwds(TypedDict):
+class _RTModeKwds(TypedDict):
     """A typed keywords for runtime mode. Only for checking if necessary."""
 
     input_width: InputWidthFormat
     spike_width: SpikeWidthFormat
     snn_en: SNNModeEnable
-
-
-class ExtraNeuAttrKwds(TypedDict, total=False):
-    """A typed keywords for extra neuron attributes."""
-
-    bit_truncation: int  # For ANNNeuron
-    delay: int
-    tick_wait_start: int
-    tick_wait_end: int
-    input_width: Union[L[1, 8], InputWidthFormat]
-    spike_width: Union[L[1, 8], SpikeWidthFormat]
-    snn_en: Union[bool, SNNModeEnable]
-    pool_max: Union[bool, MaxPoolingEnable]
-    unrolling_factor: int
-    overflow_strict: bool
diff --git a/paibox/exceptions.py b/paibox/exceptions.py
index e5204ad0..55514a43 100644
--- a/paibox/exceptions.py
+++ b/paibox/exceptions.py
@@ -57,13 +57,7 @@ class GraphConnectionError(GraphBuildError):
 
 
 class NotSupportedError(PAIBoxError, NotImplementedError):
-    """Exception for unsupported functions."""
-
-    pass
-
-
-class GraphNotSupportedError(GraphBuildError, NotSupportedError):
-    """Eception for unsupported structures of graph."""
+    """Exception for a certain function not supported."""
 
     pass
 
@@ -80,12 +74,6 @@ class FunctionalError(PAIBoxError, RuntimeError):
     pass
 
 
-class CompileError(PAIBoxError, RuntimeError):
-    """Exception for compilation."""
-
-    pass
-
-
 class RoutingError(PAIBoxError):
     """Exception for routing tree."""
 
diff --git a/paibox/network.py b/paibox/network.py
index 6db83afd..4f0a4c12 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -7,7 +7,7 @@
 from .base import DynamicSys, SynSys
 from .collector import Collector
 from .components import NeuModule, Neuron, Projection
-from .components._modules import SemiFoldedDataFlowFormat, _SemiFoldedModule
+from .components._modules import SemiFoldedStreamAttr, _SemiFoldedModule
 from .components.modules import BuiltComponentType
 from .exceptions import NotSupportedError
 from .mixin import Container
@@ -102,18 +102,18 @@ def build_modules(
 
         generated = dict()
 
-        # For external input dataflow:
-        # 1. The start time is 0.
-        # 2. The interval is 1.
-        # 3. The #N of data is `INFINITE_DATA_STREAM` since it dosen't effect the subsequent output dataflow.
+        # For external input stream info:
+        # 1. The start time is 1
+        # 2. The interval is 1
+        # 3. The #N of data is -1 since it dosen't effect the subsequent output stream.
         # TODO Reserve an interface for setting the properties of external input from `FRONTEND_ENV`?
-        last_vld_output_attr = SemiFoldedDataFlowFormat(t_1st_vld=0)
+        last_vld_output_attr = SemiFoldedStreamAttr(0, 1)
 
         for m in modules:
             # TODO for the case of the ResBlock, the `pred_dg_semi_ops` will be used.
             if isinstance(m, _SemiFoldedModule):
                 generated[m] = m.build(self, last_vld_output_attr, **build_options)
-                last_vld_output_attr = m.oflow_format
+                last_vld_output_attr = m.ostream_attr
             else:
                 generated[m] = m.build(self, **build_options)
 
diff --git a/paibox/utils.py b/paibox/utils.py
index 906933cf..6c43864b 100644
--- a/paibox/utils.py
+++ b/paibox/utils.py
@@ -155,33 +155,33 @@ def reverse_16bit(x: int) -> int:
     return ((x >> 8) | (x << 8)) & 0xFFFF
 
 
-def _get_desc(desc: Optional[str] = None) -> str:
-    return "value" if desc is None else desc
-
-
 def arg_check_pos(arg: int, desc: Optional[str] = None) -> int:
+    _desc = "value" if desc is None else f"{desc}"
     if arg < 1:
-        raise ValueError(f"{_get_desc(desc)} must be positive, but got {arg}.")
+        raise ValueError(f"{_desc} must be positive, but got {arg}.")
 
     return arg
 
 
 def arg_check_non_pos(arg: int, desc: Optional[str] = None) -> int:
+    _desc = "value" if desc is None else f"{desc}"
     if arg > 0:
-        raise ValueError(f"{_get_desc(desc)} must be non-positive, but got {arg}.")
+        raise ValueError(f"{_desc} must be non-positive, but got {arg}.")
 
     return arg
 
 
 def arg_check_neg(arg: int, desc: Optional[str] = None) -> int:
+    _desc = "value" if desc is None else f"{desc}"
     if arg > -1:
-        raise ValueError(f"{_get_desc(desc)} must be negative, but got {arg}.")
+        raise ValueError(f"{_desc} must be negative, but got {arg}.")
 
     return arg
 
 
 def arg_check_non_neg(arg: int, desc: Optional[str] = None) -> int:
+    _desc = "value" if desc is None else f"{desc}"
     if arg < 0:
-        raise ValueError(f"{_get_desc(desc)} must be non-negative, but got {arg}.")
+        raise ValueError(f"{_desc} must be non-negative, but got {arg}.")
 
     return arg
diff --git a/pyproject.toml b/pyproject.toml
index 08223a17..71124dab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,6 @@ homepage = "https://github.com/PAICookers/PAIBox"
 documentation = "https://github.com/PAICookers/PAIBox#readme"
 keywords = ["PAICORE 2.0", "PAIBox", "SNN", "Toolchain"]
 classifiers = [
-    "Development Status :: 4 - Beta",
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
     "Operating System :: OS Independent",
@@ -25,9 +24,9 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    "Topic :: Software Development :: Compilers",
+    "Topic :: Software Development :: Build Tools",
+    "Topic :: Software Development :: Libraries",
 ]
 packages = [{ include = "paibox" }]
 
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 4aae8961..c2c63a82 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -4,7 +4,6 @@
 import paibox as pb
 from paibox.base import DynamicSys
 from paibox.components import NeuModule
-from paibox.components._modules import _SemiFoldedModule
 from paibox.components.neuron.base import MetaNeuron
 from paibox.components.synapses.conv_utils import _conv2d_faster, _pair, _single
 from paibox.network import DynSysGroup
@@ -977,12 +976,12 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim1.add_probe(probe_linear)
 
-        semi_folded_modules: list[_SemiFoldedModule] = [*conv2d_list, linear]
+        semi_folded_modules = [*conv2d_list, linear]
         # The interval & the time o the first valid data of the external input data stream
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_conv
         for i in range(n_conv):
             if i == 0:
@@ -1043,16 +1042,6 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                         ],
                     )
 
-                    assert conv2d_list[i_conv].tick_wait_start + t_1st_vld_data[
-                        i_conv
-                    ] + i * semi_vld_out_intv[i_conv] - 1 == conv2d_list[
-                        i_conv
-                    ].tick_wait_start + conv2d_list[
-                        i_conv
-                    ].oflow_format.t_at_idx(
-                        i
-                    )
-
             # x is the reference result of the last convolution.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
 
@@ -1060,13 +1049,9 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.oflow_format.t_last_vld
+                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
                 ],
             )
-            assert (
-                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
-                == linear.tick_wait_start + linear.oflow_format.t_last_vld
-            )
 
     @pytest.mark.parametrize(
         "ishape_chw, n_pool, kshape_hw, stride, padding, out_features, pool_type",
@@ -1186,12 +1171,12 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         probe_linear = pb.Probe(generated[linear][0], "output")
         sim1.add_probe(probe_linear)
 
-        semi_folded_modules: list[_SemiFoldedModule] = [*pool2d_list, linear]
+        semi_folded_modules = [*pool2d_list, linear]
         # The interval & the time o the first valid data of the external input data stream
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m.ostream_attr.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_pool
         for i in range(n_pool):
             if i == 0:
@@ -1239,16 +1224,6 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                         ],
                     )
 
-                    assert pool2d_list[i_pool].tick_wait_start + t_1st_vld_data[
-                        i_pool
-                    ] + i * semi_vld_out_intv[i_pool] - 1 == pool2d_list[
-                        i_pool
-                    ].tick_wait_start + pool2d_list[
-                        i_pool
-                    ].oflow_format.t_at_idx(
-                        i
-                    )
-
             # x is the reference result of the last pooling.
             expected_fc_t = _ann_bit_trunc(x.ravel() @ fc_weight.astype(VOLTAGE_DTYPE))
 
@@ -1256,15 +1231,10 @@ def test_Pool2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.oflow_format.t_last_vld
+                    linear.tick_wait_start + linear.ostream_attr.t_last_vld
                 ],
             )
 
-            assert (
-                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
-                == linear.tick_wait_start + linear.oflow_format.t_last_vld
-            )
-
     @pytest.mark.parametrize(
         "shape, weight",
         [
diff --git a/tests/test_base.py b/tests/test_base.py
index efe9a2ef..5adb2c7f 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -1,7 +1,7 @@
 import pytest
 
 import paibox as pb
-from paibox.base import DataFlowFormat, PAIBoxObject
+from paibox.base import PAIBoxObject
 from paibox.exceptions import RegisterError
 
 
@@ -41,20 +41,3 @@ def test_paiboxobject_nodes():
 
     nodes4 = obj1.nodes(method="absolute", level=-1, include_self=True)
     assert nodes4["obj111"] == obj1
-
-
-class TestDataFlowFormat:
-    def test_dff_infinite_dataflow(self):
-        with pytest.raises((AssertionError, ValueError)):
-            dff = DataFlowFormat(1, 0, -1)
-            _ = dff.t_last_vld
-
-    def test_dff_valid(self):
-        # 1. t1 >= tws, t_last > endtick
-        dff1 = DataFlowFormat(10, 3, 10, is_local_time=False)
-        with pytest.raises(ValueError):
-            dff1._check_after_assign(8, 36)
-
-        # 2. t1 >= tws, t_last <= endtick
-        dff2 = DataFlowFormat(10, 3, 10, is_local_time=True)
-        dff2._check_after_assign(2, 39)

From 5dd9a85471041319f4a266cfbcad2665ace53108 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Dec 2024 11:35:35 +0000
Subject: [PATCH 168/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/base.py                    | 2 +-
 paibox/components/modules.py      | 2 +-
 paibox/components/neuron/base.py  | 2 +-
 paibox/components/neuron/utils.py | 2 +-
 tests/test_base.py                | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/paibox/base.py b/paibox/base.py
index c47260ff..fbfe3959 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -1,5 +1,5 @@
-from dataclasses import dataclass
 import sys
+from dataclasses import dataclass
 from typing import Any, ClassVar, Literal, Optional
 
 import numpy as np
diff --git a/paibox/components/modules.py b/paibox/components/modules.py
index 2c13edef..703c1a00 100644
--- a/paibox/components/modules.py
+++ b/paibox/components/modules.py
@@ -14,7 +14,7 @@
 from paibox.types import NEUOUT_U8_DTYPE, NeuOutType, VoltageType
 from paibox.utils import check_elem_unique, shape2num
 
-from .neuron.utils import _input_width_format, RTModeKwds, _spike_width_format
+from .neuron.utils import RTModeKwds, _input_width_format, _spike_width_format
 from .projection import InputProj
 
 if sys.version_info >= (3, 10):
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index bef9976b..635ac3f6 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -41,10 +41,10 @@
 from .utils import (
     BIT_TRUNCATE_MAX,
     NEG_THRES_MIN,
+    RTModeKwds,
     _input_width_format,
     _leak_v_check,
     _mask,
-    RTModeKwds,
     _spike_width_format,
     vjt_overflow,
 )
diff --git a/paibox/components/neuron/utils.py b/paibox/components/neuron/utils.py
index dd58006f..349ea050 100644
--- a/paibox/components/neuron/utils.py
+++ b/paibox/components/neuron/utils.py
@@ -4,9 +4,9 @@
 import numpy as np
 from paicorelib import (
     InputWidthFormat,
+    MaxPoolingEnable,
     SNNModeEnable,
     SpikeWidthFormat,
-    MaxPoolingEnable,
 )
 from paicorelib.framelib.utils import _mask
 from paicorelib.ram_model import (
diff --git a/tests/test_base.py b/tests/test_base.py
index cd55346d..efe9a2ef 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -1,7 +1,7 @@
 import pytest
 
 import paibox as pb
-from paibox.base import PAIBoxObject, DataFlowFormat
+from paibox.base import DataFlowFormat, PAIBoxObject
 from paibox.exceptions import RegisterError
 
 

From dae4c3ca06522dae22a2010993723b2c075210fb Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 3 Dec 2024 16:41:11 +0800
Subject: [PATCH 169/187] =?UTF-8?q?=F0=9F=9A=9A=20rename=20`oflow=5Fformat?=
 =?UTF-8?q?`=20to=20`=5Foflow=5Fformat`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/graphs.py            |  2 +-
 paibox/base.py                      |  2 +-
 paibox/components/_modules.py       |  2 +-
 paibox/components/functional.py     | 42 ++++++++++++++---------------
 paibox/components/neuron/base.py    | 26 +++++++++---------
 paibox/network.py                   |  2 +-
 tests/components/test_functional.py | 20 +++++++-------
 7 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/paibox/backend/graphs.py b/paibox/backend/graphs.py
index 69a117e7..e904d4ab 100644
--- a/paibox/backend/graphs.py
+++ b/paibox/backend/graphs.py
@@ -533,7 +533,7 @@ def _find_rg_by_cb(
     def inherent_timestep(self) -> int:
         self.build_check()
         return max(
-            n.oflow_format.get_global_t_1st_vld(n.tick_wait_start)
+            n._oflow_format.get_global_t_1st_vld(n.tick_wait_start)
             for n in self.onodes.values()
         )
 
diff --git a/paibox/base.py b/paibox/base.py
index fbfe3959..82740f00 100644
--- a/paibox/base.py
+++ b/paibox/base.py
@@ -338,7 +338,7 @@ class NeuDyn(DynamicSys, ReceiveInputProj, TimeRelatedNode):
     _uf: int
     """unrolling_factor"""
 
-    oflow_format: DataFlowFormat
+    _oflow_format: DataFlowFormat
     """The format of output data stream"""
 
     def __init__(self, name: Optional[str] = None) -> None:
diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 991a3f48..907f0a95 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -168,7 +168,7 @@ class _SemiFoldedModule(FunctionalModule):
     """Functional modules with interfaces in semi-folded form. Use `build()` of class `HasSemiFoldedIntf`."""
 
     inherent_delay = 1
-    oflow_format: SemiFoldedDataFlowFormat
+    _oflow_format: SemiFoldedDataFlowFormat
 
     def build(
         self,
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index be7897c9..ec0bb1c8 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -910,10 +910,10 @@ def build(
     ) -> BuiltComponentType:
         assert len(self.source[0].shape_out) == 2
         # For semi-folded linear, the valid output is at only one timestep.
-        self.oflow_format = SemiFoldedDataFlowFormat(
+        self._oflow_format = SemiFoldedDataFlowFormat(
             incoming_flow_format.t_last_vld, 1, 1
         )
-        twe = 1 + self.oflow_format.t_last_vld
+        twe = 1 + self._oflow_format.t_last_vld
 
         ich, ih = self.source[0].shape_out
 
@@ -932,9 +932,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_linear.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
+            self._oflow_format.t_1st_vld,
+            self._oflow_format.interval,
+            self._oflow_format.n_vld,
         )
 
         for i in range(ih):
@@ -1055,12 +1055,12 @@ def build(
         _, cin, _, kw = self.kernel.shape
         _, ow = self.shape_out
 
-        self.oflow_format = SemiFoldedDataFlowFormat(
+        self._oflow_format = SemiFoldedDataFlowFormat(
             incoming_flow_format.t_at_n(kw - self.padding[0]),
             incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.oflow_format.t_last_vld
+        twe = 1 + self._oflow_format.t_last_vld
 
         if build_options.get("check_before_compile"):
             self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
@@ -1082,9 +1082,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_conv2d.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
+            self._oflow_format.t_1st_vld,
+            self._oflow_format.interval,
+            self._oflow_format.n_vld,
         )
 
         for i in range(kw):
@@ -1224,12 +1224,12 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.oflow_format = SemiFoldedDataFlowFormat(
+        self._oflow_format = SemiFoldedDataFlowFormat(
             incoming_flow_format.t_at_n(kw),
             incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.oflow_format.t_last_vld
+        twe = 1 + self._oflow_format.t_last_vld
 
         if build_options.get("check_before_compile"):
             self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
@@ -1247,9 +1247,9 @@ def build(
             name=f"nd_{self.name}",
         )
         n_pool2d.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
+            self._oflow_format.t_1st_vld,
+            self._oflow_format.interval,
+            self._oflow_format.n_vld,
         )
 
         for i in range(kw):
@@ -1349,12 +1349,12 @@ def build(
         kh, kw = self.kernel_size
         _, ow = self.shape_out
 
-        self.oflow_format = SemiFoldedDataFlowFormat(
+        self._oflow_format = SemiFoldedDataFlowFormat(
             incoming_flow_format.t_at_n(kw - self.padding[0]),
             incoming_flow_format.interval * self.stride[1],
             ow,
         )
-        twe = 1 + self.oflow_format.t_last_vld
+        twe = 1 + self._oflow_format.t_last_vld
 
         if build_options.get("check_before_compile"):
             self._input_buffer_len_check(cin, ih, kw, incoming_flow_format.interval)
@@ -1380,16 +1380,16 @@ def build(
         n_pool2d = ANNNeuron(
             self.shape_out,
             delay=self.delay_relative,
-            bit_trunc=bit_trunc,
+            bit_trunc=bt,
             tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=twe,
             keep_shape=self.keep_shape,
             name=f"nd_{self.name}",
         )
         n_pool2d.set_oflow_format(
-            self.oflow_format.t_1st_vld,
-            self.oflow_format.interval,
-            self.oflow_format.n_vld,
+            self._oflow_format.t_1st_vld,
+            self._oflow_format.interval,
+            self._oflow_format.n_vld,
         )
 
         for i in range(kw):
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 635ac3f6..1b96ffa7 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -493,7 +493,7 @@ def __init__(
         self._twe = arg_check_non_neg(tick_wait_end, "'tick_wait_end'")
         self._uf = arg_check_pos(unrolling_factor, "'unrolling_factor'")
         # Default dataflow is infinite and continuous, starting at tws+0.
-        self.oflow_format = DataFlowFormat(0, is_local_time=True)
+        self._oflow_format = DataFlowFormat(0, is_local_time=True)
 
     def __len__(self) -> int:
         return self._n_neuron
@@ -540,19 +540,21 @@ def set_oflow_format(
         format_type: type[DataFlowFormat] = DataFlowFormat,
     ) -> None:
         """Set the attributes of output dataflow format by given arguments."""
-        if hasattr(self, "oflow_format"):
+        if hasattr(self, "_oflow_format"):
             _t_1st_vld = (
-                t_1st_vld if isinstance(t_1st_vld, int) else self.oflow_format.t_1st_vld
+                t_1st_vld
+                if isinstance(t_1st_vld, int)
+                else self._oflow_format.t_1st_vld
             )
             _interval = (
                 arg_check_pos(interval, "interval")
                 if isinstance(interval, int)
-                else self.oflow_format.interval
+                else self._oflow_format.interval
             )
             _n_vld = (
                 arg_check_non_neg(n_vld, "n_vld")
                 if isinstance(n_vld, int)
-                else self.oflow_format.n_vld
+                else self._oflow_format.n_vld
             )
             self._assign_flow_format(_t_1st_vld, _interval, _n_vld)
         else:
@@ -562,17 +564,17 @@ def set_oflow_format(
                 and isinstance(t_1st_vld, int)
             ):
                 raise ValueError(
-                    "if 'oflow_format' is not set, 't_1st_vld', 'interval' & 'n_vld' must be set."
+                    "if '_oflow_format' is not set, 't_1st_vld', 'interval' & 'n_vld' must be set."
                 )
 
-            self.oflow_format = format_type(t_1st_vld, interval, n_vld)
-            self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
+            self._oflow_format = format_type(t_1st_vld, interval, n_vld)
+            self._oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
 
     def _assign_flow_format(self, t_1st_vld: int, intv: int, n_vld: int) -> None:
-        self.oflow_format.t_1st_vld = t_1st_vld
-        self.oflow_format.interval = intv
-        self.oflow_format.n_vld = n_vld
-        self.oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
+        self._oflow_format.t_1st_vld = t_1st_vld
+        self._oflow_format.interval = intv
+        self._oflow_format.n_vld = n_vld
+        self._oflow_format._check_after_assign(self.tick_wait_start, self.end_tick)
 
     def __copy__(self) -> "Neuron":
         """Same as `__deepcopy__`."""
diff --git a/paibox/network.py b/paibox/network.py
index 6db83afd..4f6dc190 100644
--- a/paibox/network.py
+++ b/paibox/network.py
@@ -113,7 +113,7 @@ def build_modules(
             # TODO for the case of the ResBlock, the `pred_dg_semi_ops` will be used.
             if isinstance(m, _SemiFoldedModule):
                 generated[m] = m.build(self, last_vld_output_attr, **build_options)
-                last_vld_output_attr = m.oflow_format
+                last_vld_output_attr = m._oflow_format
             else:
                 generated[m] = m.build(self, **build_options)
 
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 4aae8961..a738b460 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -982,7 +982,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m._oflow_format.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_conv
         for i in range(n_conv):
             if i == 0:
@@ -1049,7 +1049,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                         i_conv
                     ].tick_wait_start + conv2d_list[
                         i_conv
-                    ].oflow_format.t_at_idx(
+                    ]._oflow_format.t_at_idx(
                         i
                     )
 
@@ -1060,12 +1060,12 @@ def test_Conv2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.oflow_format.t_last_vld
+                    linear.tick_wait_start + linear._oflow_format.t_last_vld
                 ],
             )
             assert (
-                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
-                == linear.tick_wait_start + linear.oflow_format.t_last_vld
+                linear._oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
+                == linear.tick_wait_start + linear._oflow_format.t_last_vld
             )
 
     @pytest.mark.parametrize(
@@ -1191,7 +1191,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
         semi_vld_out_intv0 = 1
         t_1st_vld_data0 = 0
         # The interval & the time of the first valid data of the current layers
-        semi_vld_out_intv = [m.oflow_format.interval for m in semi_folded_modules]
+        semi_vld_out_intv = [m._oflow_format.interval for m in semi_folded_modules]
         t_1st_vld_data = [0] * n_pool
         for i in range(n_pool):
             if i == 0:
@@ -1245,7 +1245,7 @@ def test_Pool2dSemiFolded_FC_ChainNet(
                         i_pool
                     ].tick_wait_start + pool2d_list[
                         i_pool
-                    ].oflow_format.t_at_idx(
+                    ]._oflow_format.t_at_idx(
                         i
                     )
 
@@ -1256,13 +1256,13 @@ def test_Pool2dSemiFolded_FC_ChainNet(
             assert np.array_equal(
                 expected_fc_t,
                 sim1.data[probe_linear][
-                    linear.tick_wait_start + linear.oflow_format.t_last_vld
+                    linear.tick_wait_start + linear._oflow_format.t_last_vld
                 ],
             )
 
             assert (
-                linear.oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
-                == linear.tick_wait_start + linear.oflow_format.t_last_vld
+                linear._oflow_format.get_global_t_1st_vld(linear.tick_wait_start)
+                == linear.tick_wait_start + linear._oflow_format.t_last_vld
             )
 
     @pytest.mark.parametrize(

From ec1a83d37dc091d434e367b375f8089887f7424b Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Tue, 3 Dec 2024 16:43:05 +0800
Subject: [PATCH 170/187] =?UTF-8?q?=F0=9F=97=91=EF=B8=8F=20remove=20bias?=
 =?UTF-8?q?=20check=20in=20semi-folded=20ops.=20Add=20`bit=5Ftrunc`=20for?=
 =?UTF-8?q?=20semi-folded=20pooling=20ops?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/_modules.py    | 25 +++++++++----------------
 paibox/components/functional.py  | 27 ++++++++++++++-------------
 paibox/components/neuron/base.py |  2 +-
 3 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/paibox/components/_modules.py b/paibox/components/_modules.py
index 907f0a95..b7798402 100644
--- a/paibox/components/_modules.py
+++ b/paibox/components/_modules.py
@@ -1,3 +1,4 @@
+import math
 import typing
 from typing import Literal, Optional, Union
 
@@ -5,9 +6,8 @@
 from paicorelib import TM, HwConfig
 
 from paibox.base import DataFlowFormat, NeuDyn, NodeList
-from paibox.exceptions import ResourceError, ShapeError
+from paibox.exceptions import ResourceError
 from paibox.types import (
-    LEAK_V_DTYPE,
     NEUOUT_U8_DTYPE,
     WEIGHT_DTYPE,
     DataType,
@@ -216,26 +216,19 @@ def __init__(
             neuron_s: the input neuron.
             out_features: the output shape.
             weights: the weight matrix.
-            bias: It can be a scalar or an array of the same size as the output.
+            bias: it can be a scalar or an array of the same size as the output.
             bit_trunc: the bit truncation position. By default, bits 7 to 0 are truncated.
         """
         self.weights = weights
         self.bit_trunc = bit_trunc
-        _shape_out = as_shape(out_features)
-
-        if isinstance(bias, np.ndarray):
-            _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
-            if _bias.shape != _shape_out:
-                raise ShapeError(
-                    f"the shape of bias {_bias.shape} does not match the shape of output {_shape_out}."
-                )
-        else:
-            _bias = int(bias)
-
-        self.bias = _bias
+        self.bias = bias
 
         super().__init__(
-            neuron_s, shape_out=_shape_out, keep_shape=keep_shape, name=name, **kwargs
+            neuron_s,
+            shape_out=as_shape(out_features),
+            keep_shape=keep_shape,
+            name=name,
+            **kwargs,
         )
 
 
diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index ec0bb1c8..92d31f92 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -996,7 +996,7 @@ def __init__(
             kernel: convolution kernel in (O,I,H,W) order.
             stride: the step size of the kernel sliding. It can be a scalar or a tuple of 2 integers.
             padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 2 integers.
-            bias: It can be a scalar or an array of the same size as the output.
+            bias: it can be a scalar or an array of the same size as the output.
             bit_trunc: the bit truncation position. By default, bits 7 to 0 are truncated.
         """
         if kernel.ndim != self._spatial_ndim + 2:
@@ -1023,17 +1023,7 @@ def __init__(
             raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
         _shape_out = (cout, out_h)
-
-        if isinstance(bias, np.ndarray):
-            _bias = np.atleast_1d(bias).astype(LEAK_V_DTYPE)
-            if _bias.shape != _shape_out:
-                raise ShapeError(
-                    f"the shape of bias {_bias.shape} does not match the shape of output {_shape_out}."
-                )
-        else:
-            _bias = int(bias)
-
-        self.bias = _bias
+        self.bias = bias
 
         super().__init__(
             neuron_s, shape_out=_shape_out, keep_shape=keep_shape, name=name, **kwargs
@@ -1173,6 +1163,7 @@ def __init__(
         neuron_s: Union[NeuDyn, InputProj],
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
+        bit_trunc: int = 8,
         *,
         keep_shape: bool = False,
         name: Optional[str] = None,
@@ -1184,6 +1175,7 @@ def __init__(
             neuron_s: the input neuron to be pooled.
             kernel_size: the size of the window to take a max over.
             stride: the stride of the window. Default value is `kernel_size`.
+            bit_trunc: the bit truncation position. By default, bits 7 to 0 are truncated.
 
         NOTE: Since the semi-folded max pooling in the ANN mode is implemented using comparators, it is not \
             possible to use negative padding layer to eliminate the incorrect results of the padding part.
@@ -1195,6 +1187,7 @@ def __init__(
             _stride = _pair(stride)
 
         self.stride = _stride
+        self.bit_trunc = bit_trunc
 
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
@@ -1239,6 +1232,7 @@ def build(
 
         n_pool2d = ANNNeuron(
             self.shape_out,
+            bit_trunc=self.bit_trunc,
             delay=self.delay_relative,
             tick_wait_start=self.tick_wait_start + 1,
             tick_wait_end=twe,
@@ -1296,6 +1290,7 @@ def __init__(
         kernel_size: _Size2Type,
         stride: Optional[_Size2Type] = None,
         padding: _Size2Type = 0,
+        bit_trunc: Optional[int] = None,
         *,
         keep_shape: bool = False,
         name: Optional[str] = None,
@@ -1309,6 +1304,7 @@ def __init__(
             stride: the stride of the window. Default value is `kernel_size`.
             padding: the amount of zero-padding applied to the input. It can be a scalar or a tuple of 2    \
                 integers.
+            bit_trunc: the bit truncation position. By default, bit_trunc = 8 + ksize.bit_length() - 1.
         """
         self.kernel_size = _pair(kernel_size)
         if stride is None:
@@ -1318,6 +1314,7 @@ def __init__(
 
         self.stride = _stride
         self.padding = _pair(padding)
+        self.bit_trunc = bit_trunc
 
         assert len(neuron_s.shape_out) == 2
         in_ch, in_h = neuron_s.shape_out
@@ -1370,7 +1367,11 @@ def build(
         # 3. The alternative is bit_tunc=16 for this layer & w*16/9 for the next layer?
         # NOTE: The resulting linear transformation of weights of the next layer needs to be considered
         # during quantization.
-        bit_trunc = 8 + (kh * kw).bit_length() - 1
+        bt = (
+            self.bit_trunc
+            if isinstance(self.bit_trunc, int)
+            else 8 + (kh * kw).bit_length() - 1
+        )
 
         n_delays = NodeList()
         n_neg_padding = NodeList()
diff --git a/paibox/components/neuron/base.py b/paibox/components/neuron/base.py
index 1b96ffa7..8498c699 100644
--- a/paibox/components/neuron/base.py
+++ b/paibox/components/neuron/base.py
@@ -160,7 +160,7 @@ def __init__(
 
         if bit_truncation > BIT_TRUNCATE_MAX:
             raise ValueError(
-                f"'bit_truncation' should be less than or equal to {BIT_TRUNCATE_MAX}."
+                f"'bit_truncation' should be less than or equal to {BIT_TRUNCATE_MAX}, but got {bit_truncation}."
             )
 
     def _neuronal_charge(

From 5d2dfdd8a820dbee1ab76407b1c7135d201b9a49 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Wed, 4 Dec 2024 10:19:36 +0800
Subject: [PATCH 171/187] =?UTF-8?q?=F0=9F=A4=96=20fix=20workflows?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/codecov.yml | 3 +++
 .pre-commit-config.yaml       | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
index 35d36c57..1029dada 100644
--- a/.github/workflows/codecov.yml
+++ b/.github/workflows/codecov.yml
@@ -24,6 +24,9 @@ jobs:
       matrix:
         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
         os: [ubuntu-latest, windows-latest]
+        exclude: # see https://github.com/python/cpython/issues/125842
+          - python-version: "3.13"
+            os: windows-latest
     runs-on: ${{ matrix.os }}
 
     steps:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b5b4b101..bd6e1764 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,6 @@ repos:
       - id: check-symlinks
       - id: check-merge-conflict
       - id: mixed-line-ending
-        args: [--pytest-test-first]
       - id: requirements-txt-fixer
       - id: pretty-format-json
         args: [--autofix, --indent 2]

From 9be2243eb4578eb5e1a548b7a4bf4a3f3b03f19a Mon Sep 17 00:00:00 2001
From: yang1556 <2689162957@qq.com>
Date: Tue, 3 Dec 2024 16:58:17 +0800
Subject: [PATCH 172/187] support  grouped conv

---
 paibox/components/functional.py          |  20 +++--
 paibox/components/synapses/base.py       |   5 +-
 paibox/components/synapses/conv_utils.py | 107 +++++++++++++++++------
 paibox/components/synapses/transforms.py |  19 ++--
 4 files changed, 111 insertions(+), 40 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 92d31f92..98d690e0 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -983,6 +983,7 @@ def __init__(
         stride: _Size2Type = 1,
         padding: _Size2Type = 0,
         bias: DataType = 0,
+        groups: int = 1,
         bit_trunc: int = 8,
         *,
         keep_shape: bool = False,
@@ -1007,6 +1008,7 @@ def __init__(
         self.kernel = kernel
         self.stride = _pair(stride)
         self.padding = _pair(padding)
+        self.groups = groups
         self.bit_trunc = bit_trunc
 
         assert len(neuron_s.shape_out) == 2
@@ -1019,7 +1021,11 @@ def __init__(
 
         assert self.padding[0] < kh and self.padding[1] < kw
 
-        if in_ch != cin:
+        if in_ch % groups != 0:
+            raise ValueError('in_channels must be divisible by groups')
+        if cout % groups != 0:
+            raise ValueError('out_channels must be divisible by groups')
+        if in_ch != groups * cin:
             raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
         _shape_out = (cout, out_h)
@@ -1041,7 +1047,7 @@ def build(
         #         self.source[0].shape_out, "CHW"
         #     )
         #     self.source[0].shape_change((in_ch, in_h))
-        _, ih = self.source[0].shape_out
+        ic, ih = self.source[0].shape_out
         _, cin, _, kw = self.kernel.shape
         _, ow = self.shape_out
 
@@ -1079,7 +1085,7 @@ def build(
 
         for i in range(kw):
             neuron = ANNBypassNeuron(
-                (cin, ih),
+                (ic, ih),
                 delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
                 tick_wait_end=twe - incoming_flow_format.interval * i,
@@ -1090,7 +1096,7 @@ def build(
             syn1 = FullConnSyn(
                 self.source[0],
                 neuron,
-                weights=_delay_mapping_mask(ih, cin),
+                weights=_delay_mapping_mask(ih, ic),
                 conn_type=ConnType.All2All,
                 name=f"s{i}_delay_{self.name}",
             )
@@ -1102,6 +1108,7 @@ def build(
                 self.kernel[:, :, :, kw - i - 1],
                 self.stride,
                 self.padding,
+                self.groups,
                 "OIL",
                 name=f"s{i}_{self.name}",
             )
@@ -1113,7 +1120,7 @@ def build(
         if incoming_flow_format.t_1st_vld > 0:
             for p in range(self.padding[0]):
                 neuron = ANNBypassNeuron(
-                    (cin, ih),
+                    (ic, ih),
                     delay=1 + incoming_flow_format.interval * (kw - 1 - p),
                     tick_wait_start=self.tick_wait_start,
                     tick_wait_end=incoming_flow_format.t_1st_vld,
@@ -1125,7 +1132,7 @@ def build(
                 syn1 = FullConnSyn(
                     self.source[0],
                     neuron,
-                    weights=_delay_mapping_mask(ih, cin),
+                    weights=_delay_mapping_mask(ih, ic),
                     conn_type=ConnType.All2All,
                     name=f"s{p}_pad_{self.name}",
                 )
@@ -1137,6 +1144,7 @@ def build(
                     -(self.kernel[:, :, :, p]),
                     self.stride,
                     self.padding,
+                    self.groups,
                     "OIL",
                     name=f"neg_s{p}_{self.name}",
                 )
diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 01fd912f..edea85e7 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -345,6 +345,7 @@ def __init__(
         kernel: np.ndarray,
         stride: tuple[int, int],
         padding: tuple[int, int],
+        groups: int,
         order: _KOrder3d,
         name: Optional[str] = None,
     ) -> None:
@@ -367,7 +368,7 @@ def __init__(
         in_ch, in_h = source.shape_out
         out_h = (in_h + 2 * padding[0] - kernel_h) // stride[0] + 1
 
-        if in_ch != in_channels:
+        if in_ch != groups * in_channels:
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
         if (_output_size := out_channels * out_h) != dest.num_in:
@@ -377,7 +378,7 @@ def __init__(
             )
 
         self.comm = Conv2dSemiFoldedForward(
-            (in_ch, in_h), (out_channels, out_h), _kernel, stride, padding
+            (in_ch, in_h), (out_channels, out_h), _kernel, stride, padding, groups
         )
 
 
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index fd3752d9..d8b541fc 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -189,28 +189,47 @@ def _conv2d_semifolded_unroll(
     kernel: WeightType,
     stride: Size2Type,
     padding: Size2Type,
+    groups: int,
 ) -> WeightType:
-    cout, cin, kh = kernel.shape
+    cout, ck, kh = kernel.shape
+    cin = groups * ck
     ih = in_shape[1] + 2 * padding[0]
     _, oh = out_shape
     w_np = np.zeros((cin * in_shape[1], cout * oh), dtype=kernel.dtype)
-
-    for i in range(cout):
-        for j in range(cin):
-            # Must recreate `w_block` every time because some rows will be deleted.
-            w_block = np.zeros((ih, oh), dtype=kernel.dtype)
-            for k in range(oh):
-                w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[i, j, :]
-
-            if padding[0] > 0:  # H direction
-                w_block = np.delete(
-                    w_block,
-                    np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
-                    axis=0,
+    for g in range(groups):
+        for i in range(cout//groups):
+            for j in range(ck):
+                # Must recreate `w_block` every time because some rows will be deleted.
+                w_block = np.zeros((ih, oh), dtype=kernel.dtype)
+                for k in range(oh):
+                    w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[g*cout//groups+i, j, :]
+                if padding[0] > 0:  # H direction
+                    w_block = np.delete(
+                        w_block,
+                        np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
+                        axis=0,
+                    )
+                w_np[g*ck*in_shape[1] + j * in_shape[1] : g*ck*in_shape[1]+(j + 1) * in_shape[1], g*oh*cout//groups+i * oh :g*oh*cout//groups+(i + 1) * oh] = (
+                    w_block
                 )
-            w_np[j * in_shape[1] : (j + 1) * in_shape[1], i * oh : (i + 1) * oh] = (
-                w_block
-            )
+
+
+    # for i in range(cout):
+    #     for j in range(cin):
+    #         # Must recreate `w_block` every time because some rows will be deleted.
+    #         w_block = np.zeros((ih, oh), dtype=kernel.dtype)
+    #         for k in range(oh):
+    #             w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[i, j, :]
+
+    #         if padding[0] > 0:  # H direction
+    #             w_block = np.delete(
+    #                 w_block,
+    #                 np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
+    #                 axis=0,
+    #             )
+    #         w_np[j * in_shape[1] : (j + 1) * in_shape[1], i * oh : (i + 1) * oh] = (
+    #             w_block
+    #         )
 
     return w_np
 
@@ -252,29 +271,63 @@ def _conv2d_faster(
     kernel: WeightType,
     stride: Size2Type,
     padding: Size2Type,
+    groups: int = 1,
     # fm_order: str,
 ) -> SynOutType:
     """Faster 2d convolution."""
-    cout, _, kh, kw = kernel.shape  # (O, I, H, W)
+    cout, cin, kh, kw = kernel.shape  # (O, I, H, W)
+    if cout % groups != 0:
+        raise ValueError("Output channels must be divisible by groups.")
+
+        # 计算每个组的通道数
+    cin_per_group = cin
+    cout_per_group = cout // groups
 
+    # 将输入张量进行填充
     x_padded = np.pad(
         x_chw,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
     )
 
-    # kernel: (cout, cin, kh, kw) -> (cout, cin*kh*kw)
-    col_kernel = kernel.reshape(cout, -1)
+    # 用于存储最终输出
+    out = np.zeros((cout, *out_shape), dtype=np.int64)
 
-    # padded: (cin, xh+2*p[0]-kh, xw+2*p[1]-kw) -> (oh*ow, cin*kh*kw)
-    col_fm = _2d_im2col(x_padded, out_shape[0], out_shape[1], kh, kw, stride)
-    # out = np.zeros((cout,) + out_shape, dtype=np.int64)
-    # (oh*ow, cin*kh*kw) * (cout, cin*kh*kw)^T = (oh*ow, cout)
-    out = col_fm @ col_kernel.T  # + self.bias
-    # (oh*ow, cout) -> (cout, oh*ow) -> (cout, oh, ow)
-    out = out.T.reshape((cout,) + out_shape)
+    for g in range(groups):
+        # 获取当前组的输入和卷积核
+        x_group = x_padded[g * cin_per_group:(g + 1) * cin_per_group, :, :]
+        kernel_group = kernel[g * cout_per_group:(g + 1) * cout_per_group, :, :, :]
+
+        # 重塑卷积核以进行矩阵乘法
+        col_kernel = kernel_group.reshape(cout_per_group, -1)
+
+        # 转换当前组的填充图像为列格式
+        col_fm = _2d_im2col(x_group, out_shape[0], out_shape[1], kh, kw, stride)
+
+        # 进行矩阵乘法
+        out_group = col_fm @ col_kernel.T
 
+        # 将组输出重塑并合并到最终输出中
+        out[g * cout_per_group:(g + 1) * cout_per_group, :] = out_group.T.reshape((cout_per_group, *out_shape))
     return out.astype(VOLTAGE_DTYPE)
 
+    # x_padded = np.pad(
+    #     x_chw,
+    #     ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
+    # )
+
+    # # kernel: (cout, cin, kh, kw) -> (cout, cin*kh*kw)
+    # col_kernel = kernel.reshape(cout, -1)
+
+    # # padded: (cin, xh+2*p[0]-kh, xw+2*p[1]-kw) -> (oh*ow, cin*kh*kw)
+    # col_fm = _2d_im2col(x_padded, out_shape[0], out_shape[1], kh, kw, stride)
+    # # out = np.zeros((cout,) + out_shape, dtype=np.int64)
+    # # (oh*ow, cin*kh*kw) * (cout, cin*kh*kw)^T = (oh*ow, cout)
+    # out = col_fm @ col_kernel.T  # + self.bias
+    # # (oh*ow, cout) -> (cout, oh*ow) -> (cout, oh, ow)
+    # out = out.T.reshape((cout,) + out_shape)
+
+    # return out.astype(VOLTAGE_DTYPE)
+
 
 def _convtranspose1d_unroll(
     in_shape: Size1Type,
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index d8448fef..721012df 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -399,10 +399,19 @@ def connectivity(self):
 
 
 class Conv2dSemiFoldedForward(_ConvNdForward):
-    in_shape: Size2Type
-    out_shape: Size2Type
-    stride: Size2Type
-    padding: Size2Type
+    def __init__(
+        self, 
+        in_shape: SizeAnyType,
+        out_shape: SizeAnyType,
+        kernel: np.ndarray,
+        stride: _SizeAnyType = 0,
+        padding: _SizeAnyType = 0,
+        groups: int = 1,
+        output_padding: _SizeAnyType = 0,
+        ) -> None:
+        self.groups = groups
+        super().__init__(in_shape, out_shape, kernel, stride, padding, output_padding)
+
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         return x @ self.connectivity
@@ -410,7 +419,7 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
     @property
     def connectivity(self):
         return _conv2d_semifolded_unroll(
-            self.in_shape, self.out_shape, self.weights, self.stride, self.padding
+            self.in_shape, self.out_shape, self.weights, self.stride, self.padding, self.groups
         )
 
 

From 5a67041b7a6c3aba94a41ccda0381e371520d3f7 Mon Sep 17 00:00:00 2001
From: yang1556 <2689162957@qq.com>
Date: Tue, 3 Dec 2024 16:58:28 +0800
Subject: [PATCH 173/187] add some group tests

---
 tests/components/test_functional.py | 74 +++++++++++++++++++++--------
 tests/shared_networks.py            |  5 +-
 2 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index a738b460..4e4d2765 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -831,20 +831,35 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         mapper.export(fp=ensure_dump_dir)
 
     @pytest.mark.parametrize(
-        "ishape_chw, n_conv, kshape_oihw, stride, padding, out_features",
+        "ishape_chw, n_conv, kshape_oihw, stride, padding, out_features, groups",
         [
             # n_conv = 1
-            ((3, 11, 11), 1, [(1, 3, 3, 3)], [1], [1], (10,)),
-            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], [2], (10,)),
-            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], [2], (10,)),
-            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], [1], (10,)),
-            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], [0], (4, 2)),
-            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], [0], 10),
-            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], [0], (3, 3)),
-            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], [0], (10,)),
-            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], [0], 10),
+            ((3, 11, 11), 1, [(1, 3, 3, 3)], [1], [1], (10,), [1,]),
+            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], [2], (10,), [1,]), 
+            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], [2], (10,), [1,]),
+            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], [1], (10,), [1,]),
+            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], [0], (4, 2), [1,]),
+            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], [0], 10, [1,]),
+            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], [0], (3, 3), [1,]),
+            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], [0], (10,), [1,]),
+            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], [0], 10, [1,]),
+            # group
+            ((8, 32, 32), 1, [(4, 4, 3, 3)], [2], [0], 10, [2,]),
+            ((8, 32, 32), 1, [(8, 1, 3, 3)], [2], [0], 10, [8,]),
             # n_conv = 2
-            ((1, 5, 5), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [(1, 1), (1, 1)], [2, 2], 10),
+            ## group
+            ((4, 5, 5), 2, [(8, 1, 3, 3), (8, 1, 3, 3)], [(1, 1), (1, 1)], [2, 2], 10, [4, 8]),
+            (
+                (4, 32, 32),
+                2,
+                [(8, 2, 3, 3), (12, 4, 4, 4)],
+                [(2, 2), (2, 2)],
+                [1, 1],
+                10,
+                [2, 2]
+            ),
+            ##
+            ((1, 5, 5), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [(1, 1), (1, 1)], [2, 2], 10, [1, 1]),
             (
                 (4, 32, 32),
                 2,
@@ -852,6 +867,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [(2, 2), (2, 2)],
                 [1, 1],
                 10,
+                [1, 1]
             ),
             (
                 (4, 32, 32),
@@ -860,13 +876,25 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [(2, 2), (1, 1)],
                 [1, 2],
                 10,
+                [1, 1]
             ),
-            ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
-            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], [2, 2], 10),
-            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], [2, 2], 10),
-            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], [2, 1], 10),
-            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10),
+            ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10, [1, 1]),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], [2, 2], 10, [1, 1]),
+            ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [2, 2], [2, 2], 10, [1, 1]),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 4, 4)], [1, 2], [2, 1], 10, [1, 1]),
+            ((1, 24, 24), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10, [1, 1]),
             # n_conv = 3
+            ## group
+            (
+                (4, 32, 32),
+                3,
+                [(8, 1, 3, 3), (8, 1, 3, 3), (4, 2, 2, 2)],
+                [1, 1, 1],
+                [1, 1, 1],
+                3,
+                [4, 8, 4]
+            ),
+            ##
             (
                 (4, 32, 32),
                 3,
@@ -874,6 +902,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 1, 1],
                 [1, 1, 1],
                 3,
+                [1, 1, 1]
             ),
             (
                 (3, 32, 32),
@@ -882,6 +911,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 1, 1],
                 [1, 0, 1],
                 10,
+                [1, 1, 1]
             ),
             (
                 (1, 224, 224),
@@ -890,6 +920,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [2, 2, 2],
                 [3, 2, 1],
                 10,
+                [1, 1, 1]
             ),
             (
                 (3, 32, 32),
@@ -898,6 +929,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 2, 1],
                 [1, 0, 1],
                 10,
+                [1, 1, 1]
             ),
             # n_conv = 5
             (
@@ -907,7 +939,8 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 2, 1, 2, 1],
                 [1, 0, 1, 0, 1],
                 10,
-            ),
+                [1, 1, 1, 1, 1]
+            ),    
         ],
     )
     def test_Conv2dSemiFolded_FC_ChainNet(
@@ -918,14 +951,14 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         stride,
         padding,
         out_features,
+        groups,
         fixed_rng: np.random.Generator,
     ):
         """Test the network with N semi-folded conv2d + 1 semi-folded linear."""
         from tests.shared_networks import Conv2dSemiFolded_FC_ChainNetN
 
         assert n_conv == len(kshape_oihw) == len(stride)
-        assert ishape_chw[0] == kshape_oihw[0][1]
-
+        assert ishape_chw[0] == groups[0]*kshape_oihw[0][1]
         kernels = []
         strides = []
         paddings = []
@@ -960,7 +993,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         )
 
         net1 = Conv2dSemiFolded_FC_ChainNetN(
-            ishape_chw[:2], kernels, strides, paddings, out_features, fc_weight
+            ishape_chw[:2], kernels, strides, paddings, out_features, fc_weight, groups
         )
         # `net1.conv_list` will be removed in `build_fmodule`
         conv2d_list = net1.conv_list.copy()
@@ -1028,6 +1061,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
                         kernels[i_conv],
                         strides[i_conv],
                         paddings[i_conv],
+                        groups[i_conv],
                     )
                 )
 
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 6a0d5d8e..992f2511 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -255,13 +255,13 @@ def __init__(self, shape, axes):
 
 
 class Conv2dSemiFolded_FC_ChainNetN(pb.DynSysGroup):
-    def __init__(self, shape, kernels, strides, paddings, out_features, weight):
+    def __init__(self, shape, kernels, strides, paddings, out_features, weight, groups):
         super().__init__()
 
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
         self.conv_list = NodeList()
 
-        for i, (kernel, stride, padding) in enumerate(zip(kernels, strides, paddings)):
+        for i, (kernel, stride, padding, g) in enumerate(zip(kernels, strides, paddings, groups)):
             self.conv_list.append(
                 pb.Conv2dSemiFolded(
                     self.conv_list[-1] if i > 0 else self.i1,
@@ -269,6 +269,7 @@ def __init__(self, shape, kernels, strides, paddings, out_features, weight):
                     stride,
                     padding,
                     tick_wait_start=1 + 2 * i,
+                    groups=g,
                 )
             )
 

From bedb3b5777f2869e1e08b29b5d58b75ac208dfd6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 09:29:15 +0000
Subject: [PATCH 174/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/components/functional.py          |   4 +-
 paibox/components/synapses/conv_utils.py |  30 ++--
 paibox/components/synapses/transforms.py |  12 +-
 tests/components/test_functional.py      | 174 +++++++++++++++++++----
 tests/shared_networks.py                 |   4 +-
 5 files changed, 183 insertions(+), 41 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index 98d690e0..a6e33f94 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -1022,9 +1022,9 @@ def __init__(
         assert self.padding[0] < kh and self.padding[1] < kw
 
         if in_ch % groups != 0:
-            raise ValueError('in_channels must be divisible by groups')
+            raise ValueError("in_channels must be divisible by groups")
         if cout % groups != 0:
-            raise ValueError('out_channels must be divisible by groups')
+            raise ValueError("out_channels must be divisible by groups")
         if in_ch != groups * cin:
             raise ShapeError(f"the channels mismatch: {in_ch} != {cin}.")
 
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index d8b541fc..0c72cd0f 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -197,22 +197,30 @@ def _conv2d_semifolded_unroll(
     _, oh = out_shape
     w_np = np.zeros((cin * in_shape[1], cout * oh), dtype=kernel.dtype)
     for g in range(groups):
-        for i in range(cout//groups):
+        for i in range(cout // groups):
             for j in range(ck):
                 # Must recreate `w_block` every time because some rows will be deleted.
                 w_block = np.zeros((ih, oh), dtype=kernel.dtype)
                 for k in range(oh):
-                    w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[g*cout//groups+i, j, :]
+                    w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[
+                        g * cout // groups + i, j, :
+                    ]
                 if padding[0] > 0:  # H direction
                     w_block = np.delete(
                         w_block,
-                        np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
+                        np.hstack(
+                            (np.arange(padding[0]), np.arange(ih - padding[0], ih))
+                        ),
                         axis=0,
                     )
-                w_np[g*ck*in_shape[1] + j * in_shape[1] : g*ck*in_shape[1]+(j + 1) * in_shape[1], g*oh*cout//groups+i * oh :g*oh*cout//groups+(i + 1) * oh] = (
-                    w_block
-                )
-
+                w_np[
+                    g * ck * in_shape[1]
+                    + j * in_shape[1] : g * ck * in_shape[1]
+                    + (j + 1) * in_shape[1],
+                    g * oh * cout // groups
+                    + i * oh : g * oh * cout // groups
+                    + (i + 1) * oh,
+                ] = w_block
 
     # for i in range(cout):
     #     for j in range(cin):
@@ -294,8 +302,8 @@ def _conv2d_faster(
 
     for g in range(groups):
         # 获取当前组的输入和卷积核
-        x_group = x_padded[g * cin_per_group:(g + 1) * cin_per_group, :, :]
-        kernel_group = kernel[g * cout_per_group:(g + 1) * cout_per_group, :, :, :]
+        x_group = x_padded[g * cin_per_group : (g + 1) * cin_per_group, :, :]
+        kernel_group = kernel[g * cout_per_group : (g + 1) * cout_per_group, :, :, :]
 
         # 重塑卷积核以进行矩阵乘法
         col_kernel = kernel_group.reshape(cout_per_group, -1)
@@ -307,7 +315,9 @@ def _conv2d_faster(
         out_group = col_fm @ col_kernel.T
 
         # 将组输出重塑并合并到最终输出中
-        out[g * cout_per_group:(g + 1) * cout_per_group, :] = out_group.T.reshape((cout_per_group, *out_shape))
+        out[g * cout_per_group : (g + 1) * cout_per_group, :] = out_group.T.reshape(
+            (cout_per_group, *out_shape)
+        )
     return out.astype(VOLTAGE_DTYPE)
 
     # x_padded = np.pad(
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 721012df..cb687eeb 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -400,7 +400,7 @@ def connectivity(self):
 
 class Conv2dSemiFoldedForward(_ConvNdForward):
     def __init__(
-        self, 
+        self,
         in_shape: SizeAnyType,
         out_shape: SizeAnyType,
         kernel: np.ndarray,
@@ -408,18 +408,22 @@ def __init__(
         padding: _SizeAnyType = 0,
         groups: int = 1,
         output_padding: _SizeAnyType = 0,
-        ) -> None:
+    ) -> None:
         self.groups = groups
         super().__init__(in_shape, out_shape, kernel, stride, padding, output_padding)
 
-
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         return x @ self.connectivity
 
     @property
     def connectivity(self):
         return _conv2d_semifolded_unroll(
-            self.in_shape, self.out_shape, self.weights, self.stride, self.padding, self.groups
+            self.in_shape,
+            self.out_shape,
+            self.weights,
+            self.stride,
+            self.padding,
+            self.groups,
         )
 
 
diff --git a/tests/components/test_functional.py b/tests/components/test_functional.py
index 4e4d2765..6e86a1ab 100644
--- a/tests/components/test_functional.py
+++ b/tests/components/test_functional.py
@@ -834,21 +834,139 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
         "ishape_chw, n_conv, kshape_oihw, stride, padding, out_features, groups",
         [
             # n_conv = 1
-            ((3, 11, 11), 1, [(1, 3, 3, 3)], [1], [1], (10,), [1,]),
-            ((3, 12, 12), 1, [(12, 3, 3, 3)], [(1, 1)], [2], (10,), [1,]), 
-            ((8, 12, 12), 1, [(16, 8, 3, 3)], [(2, 2)], [2], (10,), [1,]),
-            ((8, 12, 12), 1, [(16, 8, 4, 4)], [2], [1], (10,), [1,]),
-            ((4, 12, 12), 1, [(8, 4, 3, 3)], [1], [0], (4, 2), [1,]),
-            ((4, 24, 24), 1, [(8, 4, 3, 3)], [2], [0], 10, [1,]),
-            ((12, 12, 12), 1, [(6, 12, 3, 3)], [1], [0], (3, 3), [1,]),
-            ((4, 24, 24), 1, [(8, 4, 4, 4)], [2], [0], (10,), [1,]),
-            ((8, 32, 32), 1, [(4, 8, 3, 3)], [2], [0], 10, [1,]),
+            (
+                (3, 11, 11),
+                1,
+                [(1, 3, 3, 3)],
+                [1],
+                [1],
+                (10,),
+                [
+                    1,
+                ],
+            ),
+            (
+                (3, 12, 12),
+                1,
+                [(12, 3, 3, 3)],
+                [(1, 1)],
+                [2],
+                (10,),
+                [
+                    1,
+                ],
+            ),
+            (
+                (8, 12, 12),
+                1,
+                [(16, 8, 3, 3)],
+                [(2, 2)],
+                [2],
+                (10,),
+                [
+                    1,
+                ],
+            ),
+            (
+                (8, 12, 12),
+                1,
+                [(16, 8, 4, 4)],
+                [2],
+                [1],
+                (10,),
+                [
+                    1,
+                ],
+            ),
+            (
+                (4, 12, 12),
+                1,
+                [(8, 4, 3, 3)],
+                [1],
+                [0],
+                (4, 2),
+                [
+                    1,
+                ],
+            ),
+            (
+                (4, 24, 24),
+                1,
+                [(8, 4, 3, 3)],
+                [2],
+                [0],
+                10,
+                [
+                    1,
+                ],
+            ),
+            (
+                (12, 12, 12),
+                1,
+                [(6, 12, 3, 3)],
+                [1],
+                [0],
+                (3, 3),
+                [
+                    1,
+                ],
+            ),
+            (
+                (4, 24, 24),
+                1,
+                [(8, 4, 4, 4)],
+                [2],
+                [0],
+                (10,),
+                [
+                    1,
+                ],
+            ),
+            (
+                (8, 32, 32),
+                1,
+                [(4, 8, 3, 3)],
+                [2],
+                [0],
+                10,
+                [
+                    1,
+                ],
+            ),
             # group
-            ((8, 32, 32), 1, [(4, 4, 3, 3)], [2], [0], 10, [2,]),
-            ((8, 32, 32), 1, [(8, 1, 3, 3)], [2], [0], 10, [8,]),
+            (
+                (8, 32, 32),
+                1,
+                [(4, 4, 3, 3)],
+                [2],
+                [0],
+                10,
+                [
+                    2,
+                ],
+            ),
+            (
+                (8, 32, 32),
+                1,
+                [(8, 1, 3, 3)],
+                [2],
+                [0],
+                10,
+                [
+                    8,
+                ],
+            ),
             # n_conv = 2
             ## group
-            ((4, 5, 5), 2, [(8, 1, 3, 3), (8, 1, 3, 3)], [(1, 1), (1, 1)], [2, 2], 10, [4, 8]),
+            (
+                (4, 5, 5),
+                2,
+                [(8, 1, 3, 3), (8, 1, 3, 3)],
+                [(1, 1), (1, 1)],
+                [2, 2],
+                10,
+                [4, 8],
+            ),
             (
                 (4, 32, 32),
                 2,
@@ -856,10 +974,18 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [(2, 2), (2, 2)],
                 [1, 1],
                 10,
-                [2, 2]
+                [2, 2],
             ),
             ##
-            ((1, 5, 5), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [(1, 1), (1, 1)], [2, 2], 10, [1, 1]),
+            (
+                (1, 5, 5),
+                2,
+                [(1, 1, 3, 3), (1, 1, 3, 3)],
+                [(1, 1), (1, 1)],
+                [2, 2],
+                10,
+                [1, 1],
+            ),
             (
                 (4, 32, 32),
                 2,
@@ -867,7 +993,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [(2, 2), (2, 2)],
                 [1, 1],
                 10,
-                [1, 1]
+                [1, 1],
             ),
             (
                 (4, 32, 32),
@@ -876,7 +1002,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [(2, 2), (1, 1)],
                 [1, 2],
                 10,
-                [1, 1]
+                [1, 1],
             ),
             ((1, 32, 32), 2, [(1, 1, 3, 3), (1, 1, 3, 3)], [2, 2], [2, 2], 10, [1, 1]),
             ((1, 32, 32), 2, [(1, 1, 4, 4), (1, 1, 4, 4)], [1, 2], [2, 2], 10, [1, 1]),
@@ -892,7 +1018,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 1, 1],
                 [1, 1, 1],
                 3,
-                [4, 8, 4]
+                [4, 8, 4],
             ),
             ##
             (
@@ -902,7 +1028,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 1, 1],
                 [1, 1, 1],
                 3,
-                [1, 1, 1]
+                [1, 1, 1],
             ),
             (
                 (3, 32, 32),
@@ -911,7 +1037,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 1, 1],
                 [1, 0, 1],
                 10,
-                [1, 1, 1]
+                [1, 1, 1],
             ),
             (
                 (1, 224, 224),
@@ -920,7 +1046,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [2, 2, 2],
                 [3, 2, 1],
                 10,
-                [1, 1, 1]
+                [1, 1, 1],
             ),
             (
                 (3, 32, 32),
@@ -929,7 +1055,7 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 2, 1],
                 [1, 0, 1],
                 10,
-                [1, 1, 1]
+                [1, 1, 1],
             ),
             # n_conv = 5
             (
@@ -939,8 +1065,8 @@ def test_Transpose3d_mapping(self, ensure_dump_dir):
                 [1, 2, 1, 2, 1],
                 [1, 0, 1, 0, 1],
                 10,
-                [1, 1, 1, 1, 1]
-            ),    
+                [1, 1, 1, 1, 1],
+            ),
         ],
     )
     def test_Conv2dSemiFolded_FC_ChainNet(
@@ -958,7 +1084,7 @@ def test_Conv2dSemiFolded_FC_ChainNet(
         from tests.shared_networks import Conv2dSemiFolded_FC_ChainNetN
 
         assert n_conv == len(kshape_oihw) == len(stride)
-        assert ishape_chw[0] == groups[0]*kshape_oihw[0][1]
+        assert ishape_chw[0] == groups[0] * kshape_oihw[0][1]
         kernels = []
         strides = []
         paddings = []
diff --git a/tests/shared_networks.py b/tests/shared_networks.py
index 992f2511..d5ecc26b 100644
--- a/tests/shared_networks.py
+++ b/tests/shared_networks.py
@@ -261,7 +261,9 @@ def __init__(self, shape, kernels, strides, paddings, out_features, weight, grou
         self.i1 = pb.InputProj(input=_out_bypass1, shape_out=shape)
         self.conv_list = NodeList()
 
-        for i, (kernel, stride, padding, g) in enumerate(zip(kernels, strides, paddings, groups)):
+        for i, (kernel, stride, padding, g) in enumerate(
+            zip(kernels, strides, paddings, groups)
+        ):
             self.conv_list.append(
                 pb.Conv2dSemiFolded(
                     self.conv_list[-1] if i > 0 else self.i1,

From 8a05f2775f624b376872419d4f6c9be2429567d3 Mon Sep 17 00:00:00 2001
From: Joustrd <17739386485@163.com>
Date: Tue, 10 Dec 2024 11:40:41 +0800
Subject: [PATCH 175/187] [feat] support snn grouped conv

---
 paibox/components/synapses/base.py           |  12 +-
 paibox/components/synapses/conv_utils.py     | 188 ++++++++++---------
 paibox/components/synapses/synapses.py       |   4 +
 paibox/components/synapses/transforms.py     |  18 +-
 tests/components/synapses/test_synapses.py   |  28 +--
 tests/components/synapses/test_transforms.py | 104 +++++-----
 6 files changed, 201 insertions(+), 153 deletions(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index edea85e7..d8721c3f 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -252,6 +252,7 @@ def __init__(
         stride: tuple[int],
         padding: tuple[int],
         dilation: tuple[int],
+        groups: int,
         order: _KOrder3d,
         name: Optional[str] = None,
     ) -> None:
@@ -268,7 +269,8 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,L
-        out_channels, in_channels, kernel_l = _kernel.shape
+        out_channels, group_in_channels, kernel_l = _kernel.shape
+        in_channels = groups * group_in_channels
         # C,L
         in_ch, in_l = _fm_ndim1_check(source.shape_out, "CL")
         out_l = (in_l + 2 * padding[0] - dilation[0] * (kernel_l - 1) - 1) // stride[
@@ -281,7 +283,7 @@ def __init__(
         if (_output_size := out_channels * out_l) != dest.num_in:
             raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
-        self.comm = Conv1dForward((in_l,), (out_l,), _kernel, stride, padding)
+        self.comm = Conv1dForward((in_l,), (out_l,), _kernel, stride, padding, groups=groups)
 
 
 class Conv2dSyn(FullConnectedSyn):
@@ -295,6 +297,7 @@ def __init__(
         stride: tuple[int, int],
         padding: tuple[int, int],
         dilation: tuple[int, int],
+        groups: int,
         order: _KOrder4d,
         name: Optional[str] = None,
     ) -> None:
@@ -311,7 +314,8 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,H,W
-        out_channels, in_channels, kernel_h, kernel_w = _kernel.shape
+        out_channels, group_in_channels, kernel_h, kernel_w = _kernel.shape
+        in_channels = groups * group_in_channels
         # C,H,W
         in_ch, in_h, in_w = _fm_ndim2_check(source.shape_out, "CHW")
         out_h = (in_h + 2 * padding[0] - dilation[0] * (kernel_h - 1) - 1) // stride[
@@ -331,7 +335,7 @@ def __init__(
             )
 
         self.comm = Conv2dForward(
-            (in_h, in_w), (out_h, out_w), _kernel, stride, padding
+            (in_h, in_w), (out_h, out_w), _kernel, stride, padding, groups=groups
         )
 
 
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 0c72cd0f..7cbd6763 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -67,46 +67,51 @@ def _conv1d_unroll(
     kernel: WeightType,
     stride: Size1Type,
     padding: Size1Type,
+    groups: int,
 ) -> WeightType:
     """Unroll the kernel of 1d convolution into a matrix."""
-    cout, cin, kl = kernel.shape
+    cout, group_cin, kl = kernel.shape
+    group_cout = cout // groups
+    kernel = kernel.reshape(groups, group_cout, group_cin, kl)
     il = in_shape[0] + 2 * padding[0]
     ol = out_shape[0]
 
     # weight unrolled without considering parameter padding : weight unrolled no padding
-    w_unrolled_np = np.zeros((cin * il, cout * ol), dtype=kernel.dtype)
-    zeros_image = np.zeros((cin * il, cout, ol), dtype=kernel.dtype)
-
-    for i in range(ol):
-        zeros_image.fill(0)
-        for ch_idx in np.ndindex(kernel.shape[:2]):
-            # [0] -> o_ch, [1] -> i_ch
-            zeros_image[
-                i * stride[0] + ch_idx[1] * il : i * stride[0] + ch_idx[1] * il + kl,
-                ch_idx[0],
-                i,
-            ] = kernel[ch_idx[0], ch_idx[1], :]
-
-        # if fm_order == "CL":
-        # (cin*il, cout) -> (cout, cin*il)
-        temp = zeros_image[:, :, i].T
-        # else:
-        #     # (cin*il, cout) -> (cout, il, cin)
-        #     temp = zeros_image[:, :, i].reshape(cin, il, cout).transpose()
+    w_unrolled_np = np.zeros((groups, group_cin * il, group_cout * ol), dtype=kernel.dtype)
+    zeros_image = np.zeros((groups, group_cin * il, group_cout, ol), dtype=kernel.dtype)
+    for g in range(groups):
+        for i in range(ol):
+            zeros_image[g].fill(0)
+            for ch_idx in np.ndindex(kernel.shape[1:3]):
+                # [0] -> o_ch, [1] -> i_ch
+                zeros_image[
+                    g,
+                    i * stride[0] + ch_idx[1] * il : i * stride[0] + ch_idx[1] * il + kl,
+                    ch_idx[0],
+                    i,
+                ] = kernel[g, ch_idx[0], ch_idx[1], :]
+
+            # if fm_order == "CL":
+            # (cin*il, cout) -> (cout, cin*il)
+            temp = zeros_image[g, :, :, i].T
+            # else:
+            #     # (cin*il, cout) -> (cout, il, cin)
+            #     temp = zeros_image[:, :, i].reshape(cin, il, cout).transpose()
 
-        for o_ch in range(cout):
-            w_unrolled_np[:, i + o_ch * ol] = temp[o_ch].ravel()
+            for o_ch in range(group_cout):
+                w_unrolled_np[g, :, i + o_ch * ol] = temp[o_ch].ravel()
 
     # Remove the part of the padding in the w_unrolled_no_padding
     # That is, remove useless weight in the w_unrolled_no_padding
     nil = in_shape[0]
-    w_unrolled = np.zeros((cin * nil, cout * ol), dtype=kernel.dtype)
-    for i in range(cin):
-        w_unrolled[i * nil : i * nil + nil, :] = w_unrolled_np[
-            i * il + padding[0] : i * il + il - padding[0], :
-        ]
+    w_unrolled = np.zeros((groups, group_cin * nil, group_cout * ol), dtype=kernel.dtype)
+    for g in range(groups):
+        for i in range(group_cin):
+            w_unrolled[g, i * nil : i * nil + nil, :] = w_unrolled_np[
+                g, i * il + padding[0] : i * il + il - padding[0], :
+            ]
 
-    return w_unrolled
+    return w_unrolled.reshape(group_cin * nil, cout * ol)
 
 
 def _conv2d_unroll(
@@ -115,9 +120,13 @@ def _conv2d_unroll(
     kernel: WeightType,
     stride: Size2Type,
     padding: Size2Type,
+    groups: int,
 ) -> WeightType:
     """Unroll the kernel of 2d convolution into a matrix."""
-    cout, cin, kh, kw = kernel.shape
+    cout, group_cin, kh, kw = kernel.shape
+    cin = group_cin * groups
+    group_cout = cout // groups
+    kernel = kernel.reshape(groups, group_cout, group_cin, kh, kw)
     ih = in_shape[0] + 2 * padding[0]
     iw = in_shape[1] + 2 * padding[1]
     oh, ow = out_shape
@@ -125,62 +134,66 @@ def _conv2d_unroll(
     out_size = oh * ow
 
     # weight unrolled without considering parameter padding
-    w_unrolled_np = np.zeros((cin * in_size, cout * out_size), dtype=kernel.dtype)
-    zeros_image = np.zeros((cin * ih, iw * cout, out_size), dtype=kernel.dtype)
+    w_unrolled_np = np.zeros((groups, group_cin * in_size, group_cout * out_size), dtype=kernel.dtype)
+    zeros_image = np.zeros((groups, group_cin * ih, iw * group_cout, out_size), dtype=kernel.dtype)
 
-    for i in range(oh):
-        for j in range(ow):
-            for ch_idx in np.ndindex(kernel.shape[:2]):
-                # [0] -> o_ch, [1] -> i_ch
-                zeros_image[
-                    i * stride[0]
-                    + ch_idx[1] * ih : i * stride[0]
-                    + ch_idx[1] * ih
-                    + kh,
-                    j * stride[1]
-                    + ch_idx[0] * iw : j * stride[1]
-                    + ch_idx[0] * iw
-                    + kw,
-                    i * ow + j,
-                ] = kernel[ch_idx[0], ch_idx[1], :, :]
-
-            temp = (
-                zeros_image[:, :, i * ow + j]
-                .reshape(cin * ih, cout, iw)
-                .transpose(1, 0, 2)
-            )
-            # else:
-            #     # (cin*ih, cout, iw) -> (cout, cin, ih, iw)
-            #     temp = (
-            #         zeros_image[:, :, i * ow + j]
-            #         .reshape(cin, ih, cout, iw)
-            #         .transpose(2, 1, 3, 0)
-            #     )
-
-            for o_ch in range(cout):
-                w_unrolled_np[:, i * ow + j + o_ch * out_size] = temp[o_ch].ravel()
+    for g in range(groups):
+        for i in range(oh):
+            for j in range(ow):
+                for ch_idx in np.ndindex(kernel.shape[1:3]):
+                    # [0] -> o_ch, [1] -> i_ch
+                    zeros_image[
+                        g,
+                        i * stride[0]
+                        + ch_idx[1] * ih : i * stride[0]
+                        + ch_idx[1] * ih
+                        + kh,
+                        j * stride[1]
+                        + ch_idx[0] * iw : j * stride[1]
+                        + ch_idx[0] * iw
+                        + kw,
+                        i * ow + j,
+                    ] = kernel[g, ch_idx[0], ch_idx[1], :, :]
+
+                temp = (
+                    zeros_image[g, :, :, i * ow + j]
+                    .reshape(group_cin * ih, group_cout, iw)
+                    .transpose(1, 0, 2)
+                )
+                # else:
+                #     # (cin*ih, cout, iw) -> (cout, cin, ih, iw)
+                #     temp = (
+                #         zeros_image[:, :, i * ow + j]
+                #         .reshape(cin, ih, cout, iw)
+                #         .transpose(2, 1, 3, 0)
+                #     )
+
+                for o_ch in range(group_cout):
+                    w_unrolled_np[g, :, i * ow + j + o_ch * out_size] = temp[o_ch].ravel()
 
     # Remove the part of the padding in the w_unrolled_no_padding
     # That is, remove useless weight in the w_unrolled_no_padding
     nih, niw = in_shape
     nin_size = nih * niw
-    w_unrolled = np.zeros((cin * nin_size, cout * out_size), dtype=kernel.dtype)
+    w_unrolled = np.zeros((groups, group_cin * nin_size, group_cout * out_size), dtype=kernel.dtype)
 
-    for i in range(cin):
-        for j in range(nih):
-            w_unrolled[i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
-                w_unrolled_np[
-                    i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1] : i * in_size
-                    + (padding[0] + j) * iw
-                    + padding[1]
-                    + niw,
-                    :,
-                ]
-            )
+    for g in range(groups):
+        for i in range(group_cin):
+            for j in range(nih):
+                w_unrolled[g, i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
+                    w_unrolled_np[
+                        g,
+                        i * in_size
+                        + (padding[0] + j) * iw
+                        + padding[1] : i * in_size
+                        + (padding[0] + j) * iw
+                        + padding[1]
+                        + niw,
+                        :,
+                    ]
+                )
 
-    return w_unrolled
+    return w_unrolled.reshape(group_cin * nin_size, cout * out_size)
 
 
 def _conv2d_semifolded_unroll(
@@ -253,24 +266,29 @@ def _conv1d_faster(
     kernel: WeightType,
     stride: Size1Type,
     padding: Size1Type,
+    groups: int,
 ) -> SynOutType:
     """Faster 1d convolution."""
-    cout, _, kl = kernel.shape  # (O, I, L)
+    cout, group_cin, kl = kernel.shape  # (O, I, L)
+    cin = group_cin * groups
+    local_cout = cout // groups
 
     x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])))
+    x_padded = x_padded.reshape(groups, group_cin, -1)
 
-    # kernel: (cout, cin, kl) -> (cout, cin*kl)
-    col_kernel = kernel.reshape(cout, -1)
+    # kernel: (cout, local_cin, kl) -> (groups, local_cout, local_cin*kl)
+    col_kernel = kernel.reshape(groups, local_cout, -1)
 
-    # padded: (cin, xl+2*p[0]-kl) -> (ol, cin*kl)
-    col_fm = _1d_im2col(x_padded, out_shape[0], kl, stride)
+    # padded: (groups, local_cin, xl+2*p[0]-kl) -> (groups, ol, local_cin*kl)
+    col_fm = [_1d_im2col(x_padded[i], out_shape[0], kl, stride) for i in range(groups)]
 
     # out = np.zeros((cout,) + out_shape, dtype=np.int64)
     # (ol, cin*kl) * (cout, cin*kl)^T = (ol, cout)
-    out = col_fm @ col_kernel.T  # + self.bias
+    out = [col_fm[i] @ col_kernel[i].T for i in range(groups)]  # + self.bias
+    out = [arr.T for arr in out]
+    out_arr = np.concatenate(out, axis=0)
 
-    # (ol, cout) -> (cout, ol)
-    return out.T.astype(VOLTAGE_DTYPE)
+    return out_arr.astype(VOLTAGE_DTYPE)
 
 
 def _conv2d_faster(
diff --git a/paibox/components/synapses/synapses.py b/paibox/components/synapses/synapses.py
index 20e9b63d..66e210d4 100644
--- a/paibox/components/synapses/synapses.py
+++ b/paibox/components/synapses/synapses.py
@@ -78,6 +78,7 @@ def __init__(
         *,
         stride: _Size1Type = 1,
         padding: _Size1Type = 0,
+        groups: int = 1,
         kernel_order: _KOrder3d = "OIL",
         name: Optional[str] = None,
     ) -> None:
@@ -106,6 +107,7 @@ def __init__(
             _single(stride),
             _single(padding),
             _single(1),
+            groups,
             kernel_order,
             name,
         )
@@ -120,6 +122,7 @@ def __init__(
         *,
         stride: _Size2Type = 1,
         padding: _Size2Type = 0,
+        groups: int = 1,
         kernel_order: _KOrder4d = "OIHW",
         name: Optional[str] = None,
     ) -> None:
@@ -148,6 +151,7 @@ def __init__(
             _pair(stride),
             _pair(padding),
             _pair(1),
+            groups,
             kernel_order,
             name,
         )
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index cb687eeb..c93df9fd 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -335,12 +335,14 @@ def __init__(
         stride: _SizeAnyType = 0,
         padding: _SizeAnyType = 0,
         output_padding: _SizeAnyType = 0,
+        groups: int = 1,
     ) -> None:
         self.in_shape = in_shape
         self.out_shape = out_shape
         self.stride = stride
         self.padding = padding
         self.output_padding = output_padding
+        self.groups = groups
 
         super().__init__(kernel)
 
@@ -351,9 +353,10 @@ class Conv1dForward(_ConvNdForward):
     out_shape: Size1Type
     stride: Size1Type
     padding: Size1Type
+    groups: int
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
-        cin = self.weights.shape[1]
+        cin = self.weights.shape[1] * self.groups
 
         # if self.fm_order == "LC":
         #     # (N,) -> (L, C) -> (C, L)
@@ -362,13 +365,13 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         _x = x.reshape((cin,) + self.in_shape)
 
         return _conv1d_faster(
-            _x, self.out_shape, self.weights, self.stride, self.padding
+            _x, self.out_shape, self.weights, self.stride, self.padding, self.groups
         )
 
     @property
     def connectivity(self):
-        return _conv1d_unroll(
-            self.in_shape, self.out_shape, self.weights, self.stride, self.padding
+        return _conv1d_unroll( 
+            self.in_shape, self.out_shape, self.weights, self.stride, self.padding, self.groups
         )
 
 
@@ -377,9 +380,10 @@ class Conv2dForward(_ConvNdForward):
     out_shape: Size2Type
     stride: Size2Type
     padding: Size2Type
+    groups: int
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
-        cin = self.weights.shape[1]
+        cin = self.weights.shape[1] * self.groups
 
         # if self.fm_order == "HWC":
         #     # (N,) -> (H, W, C) -> (C, H, W)
@@ -388,13 +392,13 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         _x = x.reshape((cin,) + self.in_shape)
 
         return _conv2d_faster(
-            _x, self.out_shape, self.weights, self.stride, self.padding
+            _x, self.out_shape, self.weights, self.stride, self.padding, self.groups
         )
 
     @property
     def connectivity(self):
         return _conv2d_unroll(
-            self.in_shape, self.out_shape, self.weights, self.stride, self.padding
+            self.in_shape, self.out_shape, self.weights, self.stride, self.padding, self.groups
         )
 
 
diff --git a/tests/components/synapses/test_synapses.py b/tests/components/synapses/test_synapses.py
index 506963ac..de5741e0 100644
--- a/tests/components/synapses/test_synapses.py
+++ b/tests/components/synapses/test_synapses.py
@@ -273,6 +273,7 @@ def test_Conv1d_instance(self):
         kernel_size = (5,)
         stride = 2
         padding = 1
+        groups = 2
         out_shape = ((32 + 2 - 5) // 2 + 1,)
         in_channels = 8
         out_channels = 16
@@ -282,16 +283,16 @@ def test_Conv1d_instance(self):
         n2 = pb.IF((out_channels,) + out_shape, 3)
 
         weight = np.random.randint(
-            -128, 128, size=(in_channels, out_channels) + kernel_size, dtype=np.int8
+            -128, 128, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int8
         )
         s1 = pb.Conv1d(
-            n1, n2, weight, stride=stride, padding=padding, kernel_order=korder
+            n1, n2, weight, stride=stride, padding=padding, kernel_order=korder, groups=groups
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
         assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
-            in_channels * shape2num(in_shape),
+            in_channels // groups * shape2num(in_shape),
             out_channels * shape2num(out_shape),
         )
 
@@ -300,6 +301,7 @@ def test_Conv2d_instance(self):
         kernel_size = (5, 5)
         padding = (1, 1)
         stride = 2
+        groups = 2
         out_shape = ((32 + 2 - 5) // 2 + 1, (32 + 2 - 5) // 2 + 1)
         in_channels = 8
         out_channels = 16
@@ -310,16 +312,16 @@ def test_Conv2d_instance(self):
         n2 = pb.IF((out_channels * out_shape[0] * out_shape[1],), 3)
 
         weight = np.random.randint(
-            -8, 8, size=(in_channels, out_channels) + kernel_size, dtype=np.int32
+            -8, 8, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int32
         )
         s1 = pb.Conv2d(
-            n1, n2, weight, stride=stride, padding=padding, kernel_order=korder
+            n1, n2, weight, stride=stride, padding=padding, kernel_order=korder, groups=groups
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
         assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
-            in_channels * shape2num(in_shape),
+            in_channels // groups * shape2num(in_shape),
             out_channels * shape2num(out_shape),
         )
 
@@ -328,6 +330,7 @@ def test_Conv1d_inchannel_omitted(self):
         kernel_size = (5,)
         stride = 2
         out_shape = ((32 - 5) // 2 + 1,)
+        groups = 1
         in_channels = 1  # omit it
         out_channels = 4
         korder = "IOL"
@@ -336,14 +339,14 @@ def test_Conv1d_inchannel_omitted(self):
         n2 = pb.IF((out_channels,) + out_shape, 3)
 
         weight = np.random.randint(
-            -128, 128, size=(in_channels, out_channels) + kernel_size, dtype=np.int64
+            -128, 128, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int64
         )
-        s1 = pb.Conv1d(n1, n2, weight, stride=stride, kernel_order=korder)
+        s1 = pb.Conv1d(n1, n2, weight, stride=stride, kernel_order=korder, groups=groups)
 
         assert s1.num_in == in_channels * shape2num(in_shape)
         assert s1.connectivity.dtype == WEIGHT_DTYPE
         assert s1.connectivity.shape == (
-            in_channels * shape2num(in_shape),
+            in_channels // groups * shape2num(in_shape),
             out_channels * shape2num(out_shape),
         )
 
@@ -351,6 +354,7 @@ def test_Conv2d_inchannel_omitted(self):
         in_shape = (32, 32)
         kernel_size = (5, 5)
         stride = 2
+        groups = 1
         out_shape = ((32 - 5) // 2 + 1, (32 - 5) // 2 + 1)
         in_channels = 1  # omit it
         out_channels = 4
@@ -360,13 +364,13 @@ def test_Conv2d_inchannel_omitted(self):
         n2 = pb.IF((out_channels,) + out_shape, 3)
 
         weight = np.random.randint(
-            -128, 128, size=(in_channels, out_channels) + kernel_size, dtype=np.int8
+            -128, 128, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int8
         )
-        s1 = pb.Conv2d(n1, n2, weight, stride=stride, kernel_order=korder)
+        s1 = pb.Conv2d(n1, n2, weight, stride=stride, kernel_order=korder, groups=groups)
 
         assert s1.num_in == in_channels * shape2num(in_shape)
         assert s1.connectivity.shape == (
-            in_channels * shape2num(in_shape),
+            in_channels // groups * shape2num(in_shape),
             out_channels * shape2num(out_shape),
         )
 
diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index 2171cf79..8ad46964 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -240,26 +240,26 @@ def test_MaskedLinear(
         assert f.connectivity.shape == (x.size, y.size)
 
     @pytest.mark.parametrize(
-        "xdtype, in_shape, in_channels, out_channels, kernel_size, stride, padding, kdtype",
+        "xdtype, in_shape, in_channels, out_channels, kernel_size, stride, padding, groups, kdtype",
         [
-            (np.bool_, (8,), 16, 8, (3,), (1,), (1,), np.int8),
-            (np.bool_, (28,), 16, 8, (3,), (1,), (1,), np.bool_),
-            (np.bool_, (28,), 24, 12, (3,), (2,), (2,), np.bool_),
-            (np.bool_, (28,), 24, 12, (5,), (2,), (2,), np.bool_),
-            (np.bool_, (16,), 8, 16, (3,), (2,), (0,), np.bool_),
-            (np.bool_, (28,), 16, 8, (3,), (1,), (0,), np.int8),
-            (np.bool_, (28,), 24, 12, (3,), (2,), (0,), np.int8),
-            (np.bool_, (28,), 24, 12, (5,), (2,), (0,), np.int8),
-            (np.bool_, (16,), 8, 16, (3,), (2,), (0,), np.int8),
-            (np.int8, (8,), 16, 8, (3,), (1,), (1,), np.int8),
-            (np.int8, (28,), 16, 8, (3,), (1,), (1,), np.bool_),
-            (np.int8, (28,), 24, 12, (3,), (2,), (2,), np.bool_),
-            (np.int8, (28,), 24, 12, (5,), (2,), (2,), np.bool_),
-            (np.int8, (16,), 8, 16, (3,), (2,), (0,), np.bool_),
-            (np.int8, (28,), 16, 8, (3,), (1,), (0,), np.int8),
-            (np.int8, (28,), 24, 12, (3,), (2,), (0,), np.int8),
-            (np.int8, (28,), 24, 12, (5,), (2,), (0,), np.int8),
-            (np.int8, (16,), 8, 16, (3,), (2,), (0,), np.int8),
+            (np.bool_, (8,), 16, 8, (3,), (1,), (1,), 2, np.int8),
+            (np.bool_, (28,), 16, 8, (3,), (1,), (1,), 4, np.bool_),
+            (np.bool_, (28,), 24, 12, (3,), (2,), (2,), 3, np.bool_),
+            (np.bool_, (28,), 24, 12, (5,), (2,), (2,), 6, np.bool_),
+            (np.bool_, (16,), 8, 16, (3,), (2,), (0,), 8, np.bool_),
+            (np.bool_, (28,), 16, 8, (3,), (1,), (0,), 1, np.int8),
+            (np.bool_, (28,), 24, 12, (3,), (2,), (0,), 1,np.int8),
+            (np.bool_, (28,), 24, 12, (5,), (2,), (0,), 4, np.int8),
+            (np.bool_, (16,), 8, 16, (3,), (2,), (0,), 2, np.int8),
+            (np.int8, (8,), 16, 8, (3,), (1,), (1,), 2, np.int8),
+            (np.int8, (28,), 16, 8, (3,), (1,), (1,), 4, np.bool_),
+            (np.int8, (28,), 24, 12, (3,), (2,), (2,), 3, np.bool_),
+            (np.int8, (28,), 24, 12, (5,), (2,), (2,), 3, np.bool_),
+            (np.int8, (16,), 8, 16, (3,), (2,), (0,), 8, np.bool_),
+            (np.int8, (28,), 16, 8, (3,), (1,), (0,), 1, np.int8),
+            (np.int8, (28,), 24, 12, (3,), (2,), (0,), 1, np.int8),
+            (np.int8, (28,), 24, 12, (5,), (2,), (0,), 4, np.int8),
+            (np.int8, (16,), 8, 16, (3,), (2,), (0,), 8, np.int8),
             # ((28,), 16, 8, (3,), (1,), (0,), "LC"),
             # ((24,), 8, 8, (3,), (2,), (0,), "LC"),
             # ((24,), 8, 16, (7,), (2,), (0,), "LC"),
@@ -275,17 +275,20 @@ def test_Conv1dForward(
         kernel_size,
         stride,
         padding,
+        groups,
         kdtype,
     ):
+        group_in_channels = in_channels // groups
+        group_out_channels = out_channels // groups
         if kdtype == np.bool_:
             kernel = np.random.randint(
-                0, 2, size=(out_channels, in_channels) + kernel_size, dtype=np.bool_
+                0, 2, size=(out_channels, group_in_channels) + kernel_size, dtype=np.bool_
             )
         else:
             kernel = np.random.randint(
                 np.iinfo(kdtype).min,
                 np.iinfo(kdtype).max + 1,
-                size=(out_channels, in_channels) + kernel_size,
+                size=(out_channels, group_in_channels) + kernel_size,
                 dtype=kdtype,
             )
 
@@ -301,18 +304,21 @@ def test_Conv1dForward(
             )
 
         out_shape = ((in_shape[0] + 2 * padding[0] - kernel_size[0]) // stride[0] + 1,)
+        f = tfm.Conv1dForward(in_shape, out_shape, kernel, stride, padding, groups=groups)
 
-        f = tfm.Conv1dForward(in_shape, out_shape, kernel, stride, padding)
-
-        x = np.random.randint(0, 2, size=fm_shape, dtype=np.bool_)
+        # x = np.random.randint(0, 2, size=fm_shape, dtype=np.bool_)
         xf = x.ravel()
+        xg = xf.reshape(groups, -1)
 
         # The result of __call__ using traditional conv
         y1 = f(xf)
         # The result of matmul using the unrolled matrix
-        y2 = xf @ f.connectivity.astype(np.int32)
+        fkernel = f.connectivity.astype(np.int32)
+        fkernel = fkernel.reshape(groups, group_in_channels * in_shape[0], group_out_channels * out_shape[0])
+        y2 = [xg[i] @ fkernel[i] for i in range(groups)]
+        y2 = np.concatenate(y2, axis=0)
 
-        expected = _conv1d_faster(x, out_shape, kernel, stride, padding)
+        expected = _conv1d_faster(x, out_shape, kernel, stride, padding, groups=groups)
 
         assert np.array_equal(y1, expected)
         assert np.array_equal(y2, expected.ravel())
@@ -322,20 +328,20 @@ def test_Conv1dForward(
         )
 
     @pytest.mark.parametrize(
-        "xdtype, in_shape, in_channels, out_channels, kernel_size, stride, padding, kdtype",
+        "xdtype, in_shape, in_channels, out_channels, kernel_size, stride, padding, groups, kdtype",
         [
-            (np.bool_, (28, 28), 16, 8, (3, 3), (1, 1), (1, 1), np.bool_),
-            (np.bool_, (28, 28), 24, 12, (3, 3), (2, 2), (2, 1), np.bool_),
-            (np.bool_, (28, 28), 16, 8, (3, 3), (1, 1), (2, 3), np.bool_),
-            (np.bool_, (28, 28), 24, 12, (3, 3), (2, 2), (0, 0), np.int8),
-            (np.bool_, (28, 28), 24, 12, (5, 5), (2, 1), (0, 0), np.int8),
-            (np.bool_, (8, 8), 8, 16, (3, 3), (2, 2), (1, 1), np.int8),
-            (np.int8, (28, 28), 16, 8, (3, 3), (1, 1), (1, 1), np.bool_),
-            (np.int8, (28, 28), 24, 12, (3, 3), (2, 2), (2, 1), np.bool_),
-            (np.int8, (28, 28), 16, 8, (3, 3), (1, 1), (2, 3), np.bool_),
-            (np.int8, (28, 28), 24, 12, (3, 3), (2, 2), (0, 0), np.int8),
-            (np.int8, (28, 28), 24, 12, (5, 5), (2, 1), (0, 0), np.int8),
-            (np.int8, (8, 8), 8, 16, (3, 3), (2, 2), (1, 1), np.int8),
+            (np.bool_, (28, 28), 16, 8, (3, 3), (1, 1), (1, 1), 2, np.bool_),
+            (np.bool_, (28, 28), 24, 12, (3, 3), (2, 2), (2, 1), 3, np.bool_),
+            (np.bool_, (28, 28), 16, 8, (3, 3), (1, 1), (2, 3), 8, np.bool_),
+            (np.bool_, (28, 28), 24, 12, (3, 3), (2, 2), (0, 0), 4, np.int8),
+            (np.bool_, (28, 28), 24, 12, (5, 5), (2, 1), (0, 0), 4, np.int8),
+            (np.bool_, (8, 8), 8, 16, (3, 3), (2, 2), (1, 1), 1, np.int8),
+            (np.int8, (28, 28), 16, 8, (3, 3), (1, 1), (1, 1), 8, np.bool_),
+            (np.int8, (28, 28), 24, 12, (3, 3), (2, 2), (2, 1), 1, np.bool_),
+            (np.int8, (28, 28), 16, 8, (3, 3), (1, 1), (2, 3), 4, np.bool_),
+            (np.int8, (28, 28), 24, 12, (3, 3), (2, 2), (0, 0), 12, np.int8),
+            (np.int8, (28, 28), 24, 12, (5, 5), (2, 1), (0, 0), 3, np.int8),
+            (np.int8, (8, 8), 8, 16, (3, 3), (2, 2), (1, 1), 2, np.int8),
             # ((28, 28), 16, 8, (3, 3), (1, 1), (0, 0), "HWC", np.bool_),
             # ((24, 32), 8, 8, (3, 4), (2, 1), (0, 0), "HWC", np.bool_),
             # ((24, 24), 8, 16, (7, 7), (2, 2), (0, 0), "HWC", np.bool_),
@@ -353,17 +359,21 @@ def test_Conv2dForward(
         kernel_size,
         stride,
         padding,
+        groups,
         kdtype,
     ):
+        group_in_channels = in_channels // groups
+        group_out_channels = out_channels // groups
+
         if kdtype == np.bool_:
             kernel = np.random.randint(
-                0, 2, size=(out_channels, in_channels) + kernel_size, dtype=np.bool_
+                0, 2, size=(out_channels, group_in_channels) + kernel_size, dtype=np.bool_
             )
         else:
             kernel = np.random.randint(
                 np.iinfo(kdtype).min,
                 np.iinfo(kdtype).max + 1,
-                size=(out_channels, in_channels) + kernel_size,
+                size=(out_channels, group_in_channels) + kernel_size,
                 dtype=kdtype,
             )
 
@@ -383,16 +393,20 @@ def test_Conv2dForward(
             (in_shape[1] + 2 * padding[1] - kernel_size[1]) // stride[1] + 1,
         )
 
-        f = tfm.Conv2dForward(in_shape, out_shape, kernel, stride, padding)
+        f = tfm.Conv2dForward(in_shape, out_shape, kernel, stride, padding, groups=groups)
 
         xf = x.ravel()
-
+        xg = xf.reshape(groups, -1)
+        
         # The result of __call__ using traditional conv
         y1 = f(xf)
         # The result of matmul using the unrolled matrix
-        y2 = xf @ f.connectivity.astype(np.int32)
+        fkernel = f.connectivity.astype(np.int32)
+        fkernel = fkernel.reshape(groups, group_in_channels * in_shape[0] * in_shape[1], group_out_channels * out_shape[0] * out_shape[1])
+        y2 = [xg[i] @ fkernel[i] for i in range(groups)]
+        y2 = np.concatenate(y2, axis=0)
 
-        expected = _conv2d_faster(x, out_shape, kernel, stride, padding)
+        expected = _conv2d_faster(x, out_shape, kernel, stride, padding, groups=groups)
 
         assert np.array_equal(y1, expected)
         assert np.array_equal(y2, expected.ravel())

From c6206be4f0ad70e85c996b40d6b26fbeea4cd0cd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 10 Dec 2024 03:43:59 +0000
Subject: [PATCH 176/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/components/synapses/base.py           |  4 +-
 paibox/components/synapses/conv_utils.py     | 53 +++++++++++++-------
 paibox/components/synapses/transforms.py     | 16 ++++--
 tests/components/synapses/test_synapses.py   | 44 +++++++++++++---
 tests/components/synapses/test_transforms.py | 32 +++++++++---
 5 files changed, 110 insertions(+), 39 deletions(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index d8721c3f..1db3ad3f 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -283,7 +283,9 @@ def __init__(
         if (_output_size := out_channels * out_l) != dest.num_in:
             raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
-        self.comm = Conv1dForward((in_l,), (out_l,), _kernel, stride, padding, groups=groups)
+        self.comm = Conv1dForward(
+            (in_l,), (out_l,), _kernel, stride, padding, groups=groups
+        )
 
 
 class Conv2dSyn(FullConnectedSyn):
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 7cbd6763..41f5ba83 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -77,7 +77,9 @@ def _conv1d_unroll(
     ol = out_shape[0]
 
     # weight unrolled without considering parameter padding : weight unrolled no padding
-    w_unrolled_np = np.zeros((groups, group_cin * il, group_cout * ol), dtype=kernel.dtype)
+    w_unrolled_np = np.zeros(
+        (groups, group_cin * il, group_cout * ol), dtype=kernel.dtype
+    )
     zeros_image = np.zeros((groups, group_cin * il, group_cout, ol), dtype=kernel.dtype)
     for g in range(groups):
         for i in range(ol):
@@ -86,7 +88,10 @@ def _conv1d_unroll(
                 # [0] -> o_ch, [1] -> i_ch
                 zeros_image[
                     g,
-                    i * stride[0] + ch_idx[1] * il : i * stride[0] + ch_idx[1] * il + kl,
+                    i * stride[0]
+                    + ch_idx[1] * il : i * stride[0]
+                    + ch_idx[1] * il
+                    + kl,
                     ch_idx[0],
                     i,
                 ] = kernel[g, ch_idx[0], ch_idx[1], :]
@@ -104,7 +109,9 @@ def _conv1d_unroll(
     # Remove the part of the padding in the w_unrolled_no_padding
     # That is, remove useless weight in the w_unrolled_no_padding
     nil = in_shape[0]
-    w_unrolled = np.zeros((groups, group_cin * nil, group_cout * ol), dtype=kernel.dtype)
+    w_unrolled = np.zeros(
+        (groups, group_cin * nil, group_cout * ol), dtype=kernel.dtype
+    )
     for g in range(groups):
         for i in range(group_cin):
             w_unrolled[g, i * nil : i * nil + nil, :] = w_unrolled_np[
@@ -134,8 +141,12 @@ def _conv2d_unroll(
     out_size = oh * ow
 
     # weight unrolled without considering parameter padding
-    w_unrolled_np = np.zeros((groups, group_cin * in_size, group_cout * out_size), dtype=kernel.dtype)
-    zeros_image = np.zeros((groups, group_cin * ih, iw * group_cout, out_size), dtype=kernel.dtype)
+    w_unrolled_np = np.zeros(
+        (groups, group_cin * in_size, group_cout * out_size), dtype=kernel.dtype
+    )
+    zeros_image = np.zeros(
+        (groups, group_cin * ih, iw * group_cout, out_size), dtype=kernel.dtype
+    )
 
     for g in range(groups):
         for i in range(oh):
@@ -169,29 +180,33 @@ def _conv2d_unroll(
                 #     )
 
                 for o_ch in range(group_cout):
-                    w_unrolled_np[g, :, i * ow + j + o_ch * out_size] = temp[o_ch].ravel()
+                    w_unrolled_np[g, :, i * ow + j + o_ch * out_size] = temp[
+                        o_ch
+                    ].ravel()
 
     # Remove the part of the padding in the w_unrolled_no_padding
     # That is, remove useless weight in the w_unrolled_no_padding
     nih, niw = in_shape
     nin_size = nih * niw
-    w_unrolled = np.zeros((groups, group_cin * nin_size, group_cout * out_size), dtype=kernel.dtype)
+    w_unrolled = np.zeros(
+        (groups, group_cin * nin_size, group_cout * out_size), dtype=kernel.dtype
+    )
 
     for g in range(groups):
         for i in range(group_cin):
             for j in range(nih):
-                w_unrolled[g, i * nin_size + j * niw : i * nin_size + j * niw + niw, :] = (
-                    w_unrolled_np[
-                        g,
-                        i * in_size
-                        + (padding[0] + j) * iw
-                        + padding[1] : i * in_size
-                        + (padding[0] + j) * iw
-                        + padding[1]
-                        + niw,
-                        :,
-                    ]
-                )
+                w_unrolled[
+                    g, i * nin_size + j * niw : i * nin_size + j * niw + niw, :
+                ] = w_unrolled_np[
+                    g,
+                    i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1] : i * in_size
+                    + (padding[0] + j) * iw
+                    + padding[1]
+                    + niw,
+                    :,
+                ]
 
     return w_unrolled.reshape(group_cin * nin_size, cout * out_size)
 
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index c93df9fd..9f0b483b 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -370,8 +370,13 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
 
     @property
     def connectivity(self):
-        return _conv1d_unroll( 
-            self.in_shape, self.out_shape, self.weights, self.stride, self.padding, self.groups
+        return _conv1d_unroll(
+            self.in_shape,
+            self.out_shape,
+            self.weights,
+            self.stride,
+            self.padding,
+            self.groups,
         )
 
 
@@ -398,7 +403,12 @@ def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
     @property
     def connectivity(self):
         return _conv2d_unroll(
-            self.in_shape, self.out_shape, self.weights, self.stride, self.padding, self.groups
+            self.in_shape,
+            self.out_shape,
+            self.weights,
+            self.stride,
+            self.padding,
+            self.groups,
         )
 
 
diff --git a/tests/components/synapses/test_synapses.py b/tests/components/synapses/test_synapses.py
index de5741e0..24776fa2 100644
--- a/tests/components/synapses/test_synapses.py
+++ b/tests/components/synapses/test_synapses.py
@@ -283,10 +283,19 @@ def test_Conv1d_instance(self):
         n2 = pb.IF((out_channels,) + out_shape, 3)
 
         weight = np.random.randint(
-            -128, 128, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int8
+            -128,
+            128,
+            size=(in_channels // groups, out_channels) + kernel_size,
+            dtype=np.int8,
         )
         s1 = pb.Conv1d(
-            n1, n2, weight, stride=stride, padding=padding, kernel_order=korder, groups=groups
+            n1,
+            n2,
+            weight,
+            stride=stride,
+            padding=padding,
+            kernel_order=korder,
+            groups=groups,
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
@@ -312,10 +321,19 @@ def test_Conv2d_instance(self):
         n2 = pb.IF((out_channels * out_shape[0] * out_shape[1],), 3)
 
         weight = np.random.randint(
-            -8, 8, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int32
+            -8,
+            8,
+            size=(in_channels // groups, out_channels) + kernel_size,
+            dtype=np.int32,
         )
         s1 = pb.Conv2d(
-            n1, n2, weight, stride=stride, padding=padding, kernel_order=korder, groups=groups
+            n1,
+            n2,
+            weight,
+            stride=stride,
+            padding=padding,
+            kernel_order=korder,
+            groups=groups,
         )
 
         assert s1.num_in == in_channels * shape2num(in_shape)
@@ -339,9 +357,14 @@ def test_Conv1d_inchannel_omitted(self):
         n2 = pb.IF((out_channels,) + out_shape, 3)
 
         weight = np.random.randint(
-            -128, 128, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int64
+            -128,
+            128,
+            size=(in_channels // groups, out_channels) + kernel_size,
+            dtype=np.int64,
+        )
+        s1 = pb.Conv1d(
+            n1, n2, weight, stride=stride, kernel_order=korder, groups=groups
         )
-        s1 = pb.Conv1d(n1, n2, weight, stride=stride, kernel_order=korder, groups=groups)
 
         assert s1.num_in == in_channels * shape2num(in_shape)
         assert s1.connectivity.dtype == WEIGHT_DTYPE
@@ -364,9 +387,14 @@ def test_Conv2d_inchannel_omitted(self):
         n2 = pb.IF((out_channels,) + out_shape, 3)
 
         weight = np.random.randint(
-            -128, 128, size=(in_channels // groups, out_channels) + kernel_size, dtype=np.int8
+            -128,
+            128,
+            size=(in_channels // groups, out_channels) + kernel_size,
+            dtype=np.int8,
+        )
+        s1 = pb.Conv2d(
+            n1, n2, weight, stride=stride, kernel_order=korder, groups=groups
         )
-        s1 = pb.Conv2d(n1, n2, weight, stride=stride, kernel_order=korder, groups=groups)
 
         assert s1.num_in == in_channels * shape2num(in_shape)
         assert s1.connectivity.shape == (
diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index 8ad46964..b5a3f937 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -248,7 +248,7 @@ def test_MaskedLinear(
             (np.bool_, (28,), 24, 12, (5,), (2,), (2,), 6, np.bool_),
             (np.bool_, (16,), 8, 16, (3,), (2,), (0,), 8, np.bool_),
             (np.bool_, (28,), 16, 8, (3,), (1,), (0,), 1, np.int8),
-            (np.bool_, (28,), 24, 12, (3,), (2,), (0,), 1,np.int8),
+            (np.bool_, (28,), 24, 12, (3,), (2,), (0,), 1, np.int8),
             (np.bool_, (28,), 24, 12, (5,), (2,), (0,), 4, np.int8),
             (np.bool_, (16,), 8, 16, (3,), (2,), (0,), 2, np.int8),
             (np.int8, (8,), 16, 8, (3,), (1,), (1,), 2, np.int8),
@@ -282,7 +282,10 @@ def test_Conv1dForward(
         group_out_channels = out_channels // groups
         if kdtype == np.bool_:
             kernel = np.random.randint(
-                0, 2, size=(out_channels, group_in_channels) + kernel_size, dtype=np.bool_
+                0,
+                2,
+                size=(out_channels, group_in_channels) + kernel_size,
+                dtype=np.bool_,
             )
         else:
             kernel = np.random.randint(
@@ -304,7 +307,9 @@ def test_Conv1dForward(
             )
 
         out_shape = ((in_shape[0] + 2 * padding[0] - kernel_size[0]) // stride[0] + 1,)
-        f = tfm.Conv1dForward(in_shape, out_shape, kernel, stride, padding, groups=groups)
+        f = tfm.Conv1dForward(
+            in_shape, out_shape, kernel, stride, padding, groups=groups
+        )
 
         # x = np.random.randint(0, 2, size=fm_shape, dtype=np.bool_)
         xf = x.ravel()
@@ -314,7 +319,9 @@ def test_Conv1dForward(
         y1 = f(xf)
         # The result of matmul using the unrolled matrix
         fkernel = f.connectivity.astype(np.int32)
-        fkernel = fkernel.reshape(groups, group_in_channels * in_shape[0], group_out_channels * out_shape[0])
+        fkernel = fkernel.reshape(
+            groups, group_in_channels * in_shape[0], group_out_channels * out_shape[0]
+        )
         y2 = [xg[i] @ fkernel[i] for i in range(groups)]
         y2 = np.concatenate(y2, axis=0)
 
@@ -367,7 +374,10 @@ def test_Conv2dForward(
 
         if kdtype == np.bool_:
             kernel = np.random.randint(
-                0, 2, size=(out_channels, group_in_channels) + kernel_size, dtype=np.bool_
+                0,
+                2,
+                size=(out_channels, group_in_channels) + kernel_size,
+                dtype=np.bool_,
             )
         else:
             kernel = np.random.randint(
@@ -393,16 +403,22 @@ def test_Conv2dForward(
             (in_shape[1] + 2 * padding[1] - kernel_size[1]) // stride[1] + 1,
         )
 
-        f = tfm.Conv2dForward(in_shape, out_shape, kernel, stride, padding, groups=groups)
+        f = tfm.Conv2dForward(
+            in_shape, out_shape, kernel, stride, padding, groups=groups
+        )
 
         xf = x.ravel()
         xg = xf.reshape(groups, -1)
-        
+
         # The result of __call__ using traditional conv
         y1 = f(xf)
         # The result of matmul using the unrolled matrix
         fkernel = f.connectivity.astype(np.int32)
-        fkernel = fkernel.reshape(groups, group_in_channels * in_shape[0] * in_shape[1], group_out_channels * out_shape[0] * out_shape[1])
+        fkernel = fkernel.reshape(
+            groups,
+            group_in_channels * in_shape[0] * in_shape[1],
+            group_out_channels * out_shape[0] * out_shape[1],
+        )
         y2 = [xg[i] @ fkernel[i] for i in range(groups)]
         y2 = np.concatenate(y2, axis=0)
 

From 27550ced4da934e2eaa092a4d1fe628ea28d3358 Mon Sep 17 00:00:00 2001
From: Joustrd <17739386485@163.com>
Date: Tue, 10 Dec 2024 13:53:11 +0800
Subject: [PATCH 177/187] [fix] conv init bugs

---
 paibox/components/synapses/transforms.py | 36 ++++++++++++++++--------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 9f0b483b..62db5036 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -335,25 +335,30 @@ def __init__(
         stride: _SizeAnyType = 0,
         padding: _SizeAnyType = 0,
         output_padding: _SizeAnyType = 0,
-        groups: int = 1,
     ) -> None:
         self.in_shape = in_shape
         self.out_shape = out_shape
         self.stride = stride
         self.padding = padding
         self.output_padding = output_padding
-        self.groups = groups
 
         super().__init__(kernel)
 
 
 class Conv1dForward(_ConvNdForward):
 
-    in_shape: Size1Type
-    out_shape: Size1Type
-    stride: Size1Type
-    padding: Size1Type
-    groups: int
+    def __init__(
+        self,
+        in_shape: SizeAnyType,
+        out_shape: SizeAnyType,
+        kernel: np.ndarray,
+        stride: _SizeAnyType = 0,
+        padding: _SizeAnyType = 0,
+        groups: int = 1,
+        output_padding: _SizeAnyType = 0,
+    ) -> None:
+        self.groups = groups
+        super().__init__(in_shape, out_shape, kernel, stride, padding, output_padding)
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1] * self.groups
@@ -381,11 +386,18 @@ def connectivity(self):
 
 
 class Conv2dForward(_ConvNdForward):
-    in_shape: Size2Type
-    out_shape: Size2Type
-    stride: Size2Type
-    padding: Size2Type
-    groups: int
+    def __init__(
+        self,
+        in_shape: SizeAnyType,
+        out_shape: SizeAnyType,
+        kernel: np.ndarray,
+        stride: _SizeAnyType = 0,
+        padding: _SizeAnyType = 0,
+        groups: int = 1,
+        output_padding: _SizeAnyType = 0,
+    ) -> None:
+        self.groups = groups
+        super().__init__(in_shape, out_shape, kernel, stride, padding, output_padding)
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1] * self.groups

From e37ffda3ac6868c2f8fbeb1f445b6ce509c2eb10 Mon Sep 17 00:00:00 2001
From: Joustrd <17739386485@163.com>
Date: Tue, 10 Dec 2024 23:46:15 +0800
Subject: [PATCH 178/187] [fix] Unify groups parameter initialization and
 variable names

---
 paibox/components/synapses/base.py       |  7 +--
 paibox/components/synapses/conv_utils.py | 35 ++++-----------
 paibox/components/synapses/transforms.py | 56 +++++++++---------------
 3 files changed, 32 insertions(+), 66 deletions(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 1db3ad3f..374a0dcb 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -368,13 +368,14 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,H
-        out_channels, in_channels, kernel_h = _kernel.shape
+        out_channels, group_in_channels, kernel_h = _kernel.shape
+        in_channels = groups * group_in_channels
         # I,H
         assert len(source.shape_out) == 2
         in_ch, in_h = source.shape_out
         out_h = (in_h + 2 * padding[0] - kernel_h) // stride[0] + 1
 
-        if in_ch != groups * in_channels:
+        if in_ch != in_channels:
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
         if (_output_size := out_channels * out_h) != dest.num_in:
@@ -384,7 +385,7 @@ def __init__(
             )
 
         self.comm = Conv2dSemiFoldedForward(
-            (in_ch, in_h), (out_channels, out_h), _kernel, stride, padding, groups
+            (in_ch, in_h), (out_channels, out_h), _kernel, stride, padding, groups=groups
         )
 
 
diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 41f5ba83..d3048695 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -316,13 +316,12 @@ def _conv2d_faster(
     # fm_order: str,
 ) -> SynOutType:
     """Faster 2d convolution."""
-    cout, cin, kh, kw = kernel.shape  # (O, I, H, W)
+    cout, group_cin, kh, kw = kernel.shape  # (O, I, H, W)
     if cout % groups != 0:
         raise ValueError("Output channels must be divisible by groups.")
 
-        # 计算每个组的通道数
-    cin_per_group = cin
-    cout_per_group = cout // groups
+    # 计算每个组的通道数
+    group_cout = cout // groups
 
     # 将输入张量进行填充
     x_padded = np.pad(
@@ -335,11 +334,11 @@ def _conv2d_faster(
 
     for g in range(groups):
         # 获取当前组的输入和卷积核
-        x_group = x_padded[g * cin_per_group : (g + 1) * cin_per_group, :, :]
-        kernel_group = kernel[g * cout_per_group : (g + 1) * cout_per_group, :, :, :]
+        x_group = x_padded[g * group_cin : (g + 1) * group_cin, :, :]
+        kernel_group = kernel[g * group_cout : (g + 1) * group_cout, :, :, :]
 
         # 重塑卷积核以进行矩阵乘法
-        col_kernel = kernel_group.reshape(cout_per_group, -1)
+        col_kernel = kernel_group.reshape(group_cout, -1)
 
         # 转换当前组的填充图像为列格式
         col_fm = _2d_im2col(x_group, out_shape[0], out_shape[1], kh, kw, stride)
@@ -348,29 +347,11 @@ def _conv2d_faster(
         out_group = col_fm @ col_kernel.T
 
         # 将组输出重塑并合并到最终输出中
-        out[g * cout_per_group : (g + 1) * cout_per_group, :] = out_group.T.reshape(
-            (cout_per_group, *out_shape)
+        out[g * group_cout : (g + 1) * group_cout, :] = out_group.T.reshape(
+            (group_cout, *out_shape)
         )
     return out.astype(VOLTAGE_DTYPE)
 
-    # x_padded = np.pad(
-    #     x_chw,
-    #     ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
-    # )
-
-    # # kernel: (cout, cin, kh, kw) -> (cout, cin*kh*kw)
-    # col_kernel = kernel.reshape(cout, -1)
-
-    # # padded: (cin, xh+2*p[0]-kh, xw+2*p[1]-kw) -> (oh*ow, cin*kh*kw)
-    # col_fm = _2d_im2col(x_padded, out_shape[0], out_shape[1], kh, kw, stride)
-    # # out = np.zeros((cout,) + out_shape, dtype=np.int64)
-    # # (oh*ow, cin*kh*kw) * (cout, cin*kh*kw)^T = (oh*ow, cout)
-    # out = col_fm @ col_kernel.T  # + self.bias
-    # # (oh*ow, cout) -> (cout, oh*ow) -> (cout, oh, ow)
-    # out = out.T.reshape((cout,) + out_shape)
-
-    # return out.astype(VOLTAGE_DTYPE)
-
 
 def _convtranspose1d_unroll(
     in_shape: Size1Type,
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 62db5036..1a296fa0 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -335,30 +335,26 @@ def __init__(
         stride: _SizeAnyType = 0,
         padding: _SizeAnyType = 0,
         output_padding: _SizeAnyType = 0,
+        groups: int = 1,
     ) -> None:
         self.in_shape = in_shape
         self.out_shape = out_shape
         self.stride = stride
         self.padding = padding
         self.output_padding = output_padding
+        self.groups = groups
 
         super().__init__(kernel)
 
 
 class Conv1dForward(_ConvNdForward):
 
-    def __init__(
-        self,
-        in_shape: SizeAnyType,
-        out_shape: SizeAnyType,
-        kernel: np.ndarray,
-        stride: _SizeAnyType = 0,
-        padding: _SizeAnyType = 0,
-        groups: int = 1,
-        output_padding: _SizeAnyType = 0,
-    ) -> None:
-        self.groups = groups
-        super().__init__(in_shape, out_shape, kernel, stride, padding, output_padding)
+    in_shape: Size1Type
+    out_shape: Size1Type
+    stride: Size1Type
+    padding: Size1Type
+    groups: int
+
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1] * self.groups
@@ -386,18 +382,12 @@ def connectivity(self):
 
 
 class Conv2dForward(_ConvNdForward):
-    def __init__(
-        self,
-        in_shape: SizeAnyType,
-        out_shape: SizeAnyType,
-        kernel: np.ndarray,
-        stride: _SizeAnyType = 0,
-        padding: _SizeAnyType = 0,
-        groups: int = 1,
-        output_padding: _SizeAnyType = 0,
-    ) -> None:
-        self.groups = groups
-        super().__init__(in_shape, out_shape, kernel, stride, padding, output_padding)
+
+    in_shape: Size2Type
+    out_shape: Size2Type
+    stride: Size2Type
+    padding: Size2Type
+    groups: int
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1] * self.groups
@@ -425,18 +415,12 @@ def connectivity(self):
 
 
 class Conv2dSemiFoldedForward(_ConvNdForward):
-    def __init__(
-        self,
-        in_shape: SizeAnyType,
-        out_shape: SizeAnyType,
-        kernel: np.ndarray,
-        stride: _SizeAnyType = 0,
-        padding: _SizeAnyType = 0,
-        groups: int = 1,
-        output_padding: _SizeAnyType = 0,
-    ) -> None:
-        self.groups = groups
-        super().__init__(in_shape, out_shape, kernel, stride, padding, output_padding)
+
+    in_shape: Size2Type
+    out_shape: Size2Type
+    stride: Size2Type
+    padding: Size2Type
+    groups: int
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         return x @ self.connectivity

From e7e38c5a7119b95e05fb05970c690389d0c045dc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 10 Dec 2024 15:46:59 +0000
Subject: [PATCH 179/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/components/synapses/base.py       | 7 ++++++-
 paibox/components/synapses/transforms.py | 1 -
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 374a0dcb..552cc2f3 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -385,7 +385,12 @@ def __init__(
             )
 
         self.comm = Conv2dSemiFoldedForward(
-            (in_ch, in_h), (out_channels, out_h), _kernel, stride, padding, groups=groups
+            (in_ch, in_h),
+            (out_channels, out_h),
+            _kernel,
+            stride,
+            padding,
+            groups=groups,
         )
 
 
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index 1a296fa0..da334afd 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -355,7 +355,6 @@ class Conv1dForward(_ConvNdForward):
     padding: Size1Type
     groups: int
 
-
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
         cin = self.weights.shape[1] * self.groups
 

From 08217627302460dfb6a86de0ff20adb3fa284516 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 9 Dec 2024 09:50:31 +0800
Subject: [PATCH 180/187] =?UTF-8?q?=E2=A4=B5=EF=B8=8F=20revert=20pr#146=20?=
 =?UTF-8?q?about=20twe?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/components/functional.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paibox/components/functional.py b/paibox/components/functional.py
index a6e33f94..d32d095a 100644
--- a/paibox/components/functional.py
+++ b/paibox/components/functional.py
@@ -942,7 +942,7 @@ def build(
                 shape=(ich, ih),
                 delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1088,7 +1088,7 @@ def build(
                 (ic, ih),
                 delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe,
                 name=f"n{i}_delay_{self.name}",
             )
             n_delays.append(neuron)
@@ -1259,7 +1259,7 @@ def build(
                 (cin, ih),
                 delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )
@@ -1406,7 +1406,7 @@ def build(
                 (cin, ih),
                 delay=incoming_flow_format.interval * i + 1,
                 tick_wait_start=self.tick_wait_start,
-                tick_wait_end=twe - incoming_flow_format.interval * i,
+                tick_wait_end=twe,
                 keep_shape=self.keep_shape,
                 name=f"n{i}_{self.name}",
             )

From abea1a70a62abaa768630dd0c8cbee8ba9ee75f0 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 16 Dec 2024 15:08:09 +0800
Subject: [PATCH 181/187] format(conv): removed unused comments, improved
 format

---
 paibox/components/synapses/conv_utils.py | 128 ++++++++++++-----------
 1 file changed, 69 insertions(+), 59 deletions(-)

diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index d3048695..216dd052 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -1,6 +1,7 @@
 from collections.abc import Iterable
 from functools import partial
 from itertools import repeat
+from typing import Optional
 
 import numpy as np
 from numpy.typing import NDArray
@@ -224,14 +225,16 @@ def _conv2d_semifolded_unroll(
     ih = in_shape[1] + 2 * padding[0]
     _, oh = out_shape
     w_np = np.zeros((cin * in_shape[1], cout * oh), dtype=kernel.dtype)
+
+    cout_per_grp = cout // groups
     for g in range(groups):
-        for i in range(cout // groups):
+        for i in range(cout_per_grp):
             for j in range(ck):
                 # Must recreate `w_block` every time because some rows will be deleted.
                 w_block = np.zeros((ih, oh), dtype=kernel.dtype)
                 for k in range(oh):
                     w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[
-                        g * cout // groups + i, j, :
+                        g * cout_per_grp + i, j, :
                     ]
                 if padding[0] > 0:  # H direction
                     w_block = np.delete(
@@ -245,28 +248,11 @@ def _conv2d_semifolded_unroll(
                     g * ck * in_shape[1]
                     + j * in_shape[1] : g * ck * in_shape[1]
                     + (j + 1) * in_shape[1],
-                    g * oh * cout // groups
-                    + i * oh : g * oh * cout // groups
+                    g * oh * cout_per_grp
+                    + i * oh : g * oh * cout_per_grp
                     + (i + 1) * oh,
                 ] = w_block
 
-    # for i in range(cout):
-    #     for j in range(cin):
-    #         # Must recreate `w_block` every time because some rows will be deleted.
-    #         w_block = np.zeros((ih, oh), dtype=kernel.dtype)
-    #         for k in range(oh):
-    #             w_block[k * stride[1] : k * stride[1] + kh, k] = kernel[i, j, :]
-
-    #         if padding[0] > 0:  # H direction
-    #             w_block = np.delete(
-    #                 w_block,
-    #                 np.hstack((np.arange(padding[0]), np.arange(ih - padding[0], ih))),
-    #                 axis=0,
-    #             )
-    #         w_np[j * in_shape[1] : (j + 1) * in_shape[1], i * oh : (i + 1) * oh] = (
-    #             w_block
-    #         )
-
     return w_np
 
 
@@ -281,29 +267,40 @@ def _conv1d_faster(
     kernel: WeightType,
     stride: Size1Type,
     padding: Size1Type,
-    groups: int,
+    groups: int = 1,
+    bias: Optional[WeightType] = None,
 ) -> SynOutType:
     """Faster 1d convolution."""
-    cout, group_cin, kl = kernel.shape  # (O, I, L)
-    cin = group_cin * groups
-    local_cout = cout // groups
+    cout, cin_per_grp, kl = kernel.shape  # (O, I, L)
+    assert x_cl.shape[0] == cin_per_grp * groups
+    assert cout % groups == 0
+
+    cout_per_grp = cout // groups
 
     x_padded = np.pad(x_cl, ((0, 0), (padding[0], padding[0])))
-    x_padded = x_padded.reshape(groups, group_cin, -1)
+    out = np.zeros((cout, *out_shape), dtype=np.int64)
 
-    # kernel: (cout, local_cin, kl) -> (groups, local_cout, local_cin*kl)
-    col_kernel = kernel.reshape(groups, local_cout, -1)
+    for g in range(groups):
+        x_grp = x_padded[g * cin_per_grp : (g + 1) * cin_per_grp, :]
+        kernel_grp = kernel[g * cout_per_grp : (g + 1) * cout_per_grp, :, :]
+        # kernel: (cout_per_grp, cin, kl) -> (cout_per_grp, cin*kl)
+        col_kernel = kernel_grp.reshape(cout_per_grp, -1)
+        # padded: (cin, xl+2*p[0]-kl) -> (ol, cin*kl)
+        col_fm = _1d_im2col(x_grp, out_shape[0], kl, stride)
+        # (ol, cin*kl) * (cout, cin*kl)^T = (ol, cout_per_grp)
+        out_grp = col_fm @ col_kernel.T
+
+        out[g * cout_per_grp : (g + 1) * cout_per_grp, :] = out_grp.T.reshape(
+            (cout_per_grp, *out_shape)
+        )
 
-    # padded: (groups, local_cin, xl+2*p[0]-kl) -> (groups, ol, local_cin*kl)
-    col_fm = [_1d_im2col(x_padded[i], out_shape[0], kl, stride) for i in range(groups)]
+    if bias:
+        _bias = bias.squeeze()
+        assert _bias.shape == (cout,)
 
-    # out = np.zeros((cout,) + out_shape, dtype=np.int64)
-    # (ol, cin*kl) * (cout, cin*kl)^T = (ol, cout)
-    out = [col_fm[i] @ col_kernel[i].T for i in range(groups)]  # + self.bias
-    out = [arr.T for arr in out]
-    out_arr = np.concatenate(out, axis=0)
+        out += _bias
 
-    return out_arr.astype(VOLTAGE_DTYPE)
+    return out.astype(VOLTAGE_DTYPE)
 
 
 def _conv2d_faster(
@@ -313,43 +310,42 @@ def _conv2d_faster(
     stride: Size2Type,
     padding: Size2Type,
     groups: int = 1,
-    # fm_order: str,
+    bias: Optional[WeightType] = None,
 ) -> SynOutType:
     """Faster 2d convolution."""
-    cout, group_cin, kh, kw = kernel.shape  # (O, I, H, W)
-    if cout % groups != 0:
-        raise ValueError("Output channels must be divisible by groups.")
+    cout, cin_per_grp, kh, kw = kernel.shape  # (O, I, H, W)
 
-    # 计算每个组的通道数
-    group_cout = cout // groups
+    assert x_chw.shape[0] == cin_per_grp * groups
+    assert cout % groups == 0
+
+    cout_per_grp = cout // groups
 
-    # 将输入张量进行填充
     x_padded = np.pad(
         x_chw,
         ((0, 0), (padding[0], padding[0]), (padding[1], padding[1])),
     )
-
-    # 用于存储最终输出
     out = np.zeros((cout, *out_shape), dtype=np.int64)
 
     for g in range(groups):
-        # 获取当前组的输入和卷积核
-        x_group = x_padded[g * group_cin : (g + 1) * group_cin, :, :]
-        kernel_group = kernel[g * group_cout : (g + 1) * group_cout, :, :, :]
-
-        # 重塑卷积核以进行矩阵乘法
-        col_kernel = kernel_group.reshape(group_cout, -1)
+        x_grp = x_padded[g * cin_per_grp : (g + 1) * cin_per_grp, :, :]
+        kernel_grp = kernel[g * cout_per_grp : (g + 1) * cout_per_grp, :, :, :]
+        # kernel: (cout_per_grp, cin, kh, kw) -> (cout_per_grp, cin*kh*kw)
+        col_kernel = kernel_grp.reshape(cout_per_grp, -1)
+        # padded: (cin, xh+2*p[0]-kh, xw+2*p[1]-kw) -> (oh*ow, cin*kh*kw)
+        col_fm = _2d_im2col(x_grp, out_shape[0], out_shape[1], kh, kw, stride)
+        # (oh*ow, cin*kh*kw) * (cout, cin*kh*kw)^T = (oh*ow, cout_per_grp)
+        out_grp = col_fm @ col_kernel.T
+
+        out[g * cout_per_grp : (g + 1) * cout_per_grp, :] = out_grp.T.reshape(
+            (cout_per_grp, *out_shape)
+        )
 
-        # 转换当前组的填充图像为列格式
-        col_fm = _2d_im2col(x_group, out_shape[0], out_shape[1], kh, kw, stride)
+    if bias:
+        _bias = bias.squeeze()
+        assert _bias.shape == (cout,)
 
-        # 进行矩阵乘法
-        out_group = col_fm @ col_kernel.T
+        out += _bias
 
-        # 将组输出重塑并合并到最终输出中
-        out[g * group_cout : (g + 1) * group_cout, :] = out_group.T.reshape(
-            (group_cout, *out_shape)
-        )
     return out.astype(VOLTAGE_DTYPE)
 
 
@@ -524,6 +520,7 @@ def _convtranspose1d_faster(
     stride: Size1Type,
     padding: Size1Type,
     output_padding: Size1Type,
+    bias: Optional[WeightType] = None,
 ) -> SynOutType:
     # (C, L)
     xc, xl = x_cl.shape
@@ -567,6 +564,12 @@ def _convtranspose1d_faster(
     # output_padding
     out = np.pad(out, ((0, 0), (0, output_padding[0])))
 
+    if bias:
+        _bias = bias.squeeze()
+        assert _bias.shape == (cout,)
+
+        out += _bias
+
     return out.astype(VOLTAGE_DTYPE)
 
 
@@ -577,6 +580,7 @@ def _convtranspose2d_faster(
     stride: Size2Type,
     padding: Size2Type,
     output_padding: Size2Type,
+    bias: Optional[WeightType] = None,
 ) -> SynOutType:
     # (C, H, W)
     xc, xh, xw = x_chw.shape
@@ -626,6 +630,12 @@ def _convtranspose2d_faster(
     ]
     # output_padding
     out = np.pad(out, ((0, 0), (0, output_padding[0]), (0, output_padding[1])))
+    
+    if bias:
+        _bias = bias.squeeze()
+        assert _bias.shape == (cout,)
+
+        out += _bias
 
     return out
 

From 03b02aa2b8baf32d5ad902b01ce75a1607216fd6 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 16 Dec 2024 15:09:28 +0800
Subject: [PATCH 182/187] format(synapses): improved the prompt text for input
 channel number mismatch errors

---
 paibox/components/synapses/base.py           | 61 ++++++++++----------
 paibox/components/synapses/transforms.py     | 11 ++--
 tests/components/synapses/test_transforms.py |  4 +-
 3 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/paibox/components/synapses/base.py b/paibox/components/synapses/base.py
index 552cc2f3..10196b41 100644
--- a/paibox/components/synapses/base.py
+++ b/paibox/components/synapses/base.py
@@ -269,18 +269,19 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,L
-        out_channels, group_in_channels, kernel_l = _kernel.shape
-        in_channels = groups * group_in_channels
+        o_ch, grp_in_ch, kernel_l = _kernel.shape
         # C,L
         in_ch, in_l = _fm_ndim1_check(source.shape_out, "CL")
         out_l = (in_l + 2 * padding[0] - dilation[0] * (kernel_l - 1) - 1) // stride[
             0
         ] + 1
 
-        if in_ch != in_channels:
-            raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
+        if in_ch != (_cur_in_ch := groups * grp_in_ch):
+            in_ch_mismatch_text = f"input channels mismatch: {in_ch} != {_cur_in_ch}"
+            in_ch_mismatch_text += f" ({groups}*{grp_in_ch})." if groups > 1 else "."
+            raise ShapeError(in_ch_mismatch_text)
 
-        if (_output_size := out_channels * out_l) != dest.num_in:
+        if (_output_size := o_ch * out_l) != dest.num_in:
             raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
         self.comm = Conv1dForward(
@@ -316,8 +317,7 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,H,W
-        out_channels, group_in_channels, kernel_h, kernel_w = _kernel.shape
-        in_channels = groups * group_in_channels
+        o_ch, grp_in_ch, kernel_h, kernel_w = _kernel.shape
         # C,H,W
         in_ch, in_h, in_w = _fm_ndim2_check(source.shape_out, "CHW")
         out_h = (in_h + 2 * padding[0] - dilation[0] * (kernel_h - 1) - 1) // stride[
@@ -327,12 +327,14 @@ def __init__(
             1
         ] + 1
 
-        if in_ch != in_channels:
-            raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
+        if in_ch != (_cur_in_ch := groups * grp_in_ch):
+            in_ch_mismatch_text = f"input channels mismatch: {in_ch} != {_cur_in_ch}"
+            in_ch_mismatch_text += f" ({groups}*{grp_in_ch})." if groups > 1 else "."
+            raise ShapeError(in_ch_mismatch_text)
 
-        if (_output_size := out_channels * out_h * out_w) != dest.num_in:
+        if (_output_size := o_ch * out_h * out_w) != dest.num_in:
             raise ShapeError(
-                f"output size mismatch: {_output_size} ({out_channels}*{out_h}*{out_w}) "
+                f"output size mismatch: {_output_size} ({o_ch}*{out_h}*{out_w}) "
                 f"!= {dest.num_in}."
             )
 
@@ -368,29 +370,25 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,H
-        out_channels, group_in_channels, kernel_h = _kernel.shape
-        in_channels = groups * group_in_channels
+        o_ch, grp_in_ch, kernel_h = _kernel.shape
         # I,H
         assert len(source.shape_out) == 2
         in_ch, in_h = source.shape_out
         out_h = (in_h + 2 * padding[0] - kernel_h) // stride[0] + 1
 
-        if in_ch != in_channels:
-            raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
+        if in_ch != (_cur_in_ch := groups * grp_in_ch):
+            in_ch_mismatch_text = f"input channels mismatch: {in_ch} != {_cur_in_ch}"
+            in_ch_mismatch_text += f" ({groups}*{grp_in_ch})." if groups > 1 else "."
+            raise ShapeError(in_ch_mismatch_text)
 
-        if (_output_size := out_channels * out_h) != dest.num_in:
+        if (_output_size := o_ch * out_h) != dest.num_in:
             raise ShapeError(
-                f"output size mismatch: {_output_size} ({out_channels}*{out_h}) "
+                f"output size mismatch: {_output_size} ({o_ch}*{out_h}) "
                 f"!= {dest.num_in}."
             )
 
         self.comm = Conv2dSemiFoldedForward(
-            (in_ch, in_h),
-            (out_channels, out_h),
-            _kernel,
-            stride,
-            padding,
-            groups=groups,
+            (in_ch, in_h), (o_ch, out_h), _kernel, stride, padding, groups=groups
         )
 
 
@@ -422,7 +420,7 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,L
-        out_channels, in_channels, kernel_l = _kernel.shape
+        o_ch, in_channels, kernel_l = _kernel.shape
         # C,L
         in_ch, in_l = _fm_ndim1_check(source.shape_out, "CL")
         out_l = (
@@ -436,11 +434,11 @@ def __init__(
         if in_ch != in_channels:
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
-        if (_output_size := out_channels * out_l) != dest.num_in:
+        if (_output_size := o_ch * out_l) != dest.num_in:
             raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
         self.comm = ConvTranspose1dForward(
-            (in_l,), (out_l,), _kernel, stride, padding, output_padding
+            (in_l,), (out_l,), _kernel, stride, padding, output_padding=output_padding
         )
 
 
@@ -472,7 +470,7 @@ def __init__(
             _kernel = kernel.copy()
 
         # O,I,H,W
-        out_channels, in_channels, kernel_h, kernel_w = _kernel.shape
+        o_ch, in_channels, kernel_h, kernel_w = _kernel.shape
         # C,H,W
         in_ch, in_h, in_w = _fm_ndim2_check(source.shape_out, "CHW")
         out_h = (
@@ -493,11 +491,16 @@ def __init__(
         if in_ch != in_channels:
             raise ShapeError(f"input channels mismatch: {in_ch} != {in_channels}.")
 
-        if (_output_size := out_channels * out_h * out_w) != dest.num_in:
+        if (_output_size := o_ch * out_h * out_w) != dest.num_in:
             raise ShapeError(f"output size mismatch: {_output_size} != {dest.num_in}.")
 
         self.comm = ConvTranspose2dForward(
-            (in_h, in_w), (out_h, out_w), _kernel, stride, padding, output_padding
+            (in_h, in_w),
+            (out_h, out_w),
+            _kernel,
+            stride,
+            padding,
+            output_padding=output_padding,
         )
 
 
diff --git a/paibox/components/synapses/transforms.py b/paibox/components/synapses/transforms.py
index da334afd..860aaeab 100644
--- a/paibox/components/synapses/transforms.py
+++ b/paibox/components/synapses/transforms.py
@@ -334,15 +334,15 @@ def __init__(
         kernel: np.ndarray,
         stride: _SizeAnyType = 0,
         padding: _SizeAnyType = 0,
-        output_padding: _SizeAnyType = 0,
         groups: int = 1,
+        output_padding: _SizeAnyType = 0,
     ) -> None:
         self.in_shape = in_shape
         self.out_shape = out_shape
         self.stride = stride
         self.padding = padding
-        self.output_padding = output_padding
         self.groups = groups
+        self.output_padding = output_padding
 
         super().__init__(kernel)
 
@@ -414,7 +414,6 @@ def connectivity(self):
 
 
 class Conv2dSemiFoldedForward(_ConvNdForward):
-
     in_shape: Size2Type
     out_shape: Size2Type
     stride: Size2Type
@@ -441,10 +440,11 @@ class ConvTranspose1dForward(_ConvNdForward):
     out_shape: Size1Type
     stride: Size1Type
     padding: Size1Type
+    groups: int
     output_padding: Size1Type
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
-        cin = self.weights.shape[1]
+        cin = self.weights.shape[1] * self.groups
 
         # if self.fm_order == "LC":
         #     # (N,) -> (L, C) -> (C, L)
@@ -478,10 +478,11 @@ class ConvTranspose2dForward(_ConvNdForward):
     out_shape: Size2Type
     stride: Size2Type
     padding: Size2Type
+    groups: int
     output_padding: Size2Type
 
     def __call__(self, x: NeuOutType, *args, **kwargs) -> SynOutType:
-        cin = self.weights.shape[1]
+        cin = self.weights.shape[1] * self.groups
 
         # if self.fm_order == "HWC":
         #     # (N,) -> (H, W, C) -> (C, H, W)
diff --git a/tests/components/synapses/test_transforms.py b/tests/components/synapses/test_transforms.py
index b5a3f937..48807836 100644
--- a/tests/components/synapses/test_transforms.py
+++ b/tests/components/synapses/test_transforms.py
@@ -553,7 +553,7 @@ def test_ConvTranspose1dForward(
             + output_padding[0],
         )
         f = tfm.ConvTranspose1dForward(
-            in_shape, out_shape, kernel, stride, padding, output_padding
+            in_shape, out_shape, kernel, stride, padding, output_padding=output_padding
         )
 
         xf = x.ravel()
@@ -713,7 +713,7 @@ def test_ConvTranspose2dForward(
         )
 
         f = tfm.ConvTranspose2dForward(
-            in_shape, out_shape, kernel, stride, padding, output_padding
+            in_shape, out_shape, kernel, stride, padding, output_padding=output_padding
         )
 
         x = np.random.randint(0, 2, size=fm_shape, dtype=np.bool_)

From 1aa21fd6192c7ef63c66e210e19d32afa533eea0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2024 07:11:42 +0000
Subject: [PATCH 183/187] :rotating_light: auto fix by pre-commit hooks

---
 paibox/components/synapses/conv_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paibox/components/synapses/conv_utils.py b/paibox/components/synapses/conv_utils.py
index 216dd052..7c8c996c 100644
--- a/paibox/components/synapses/conv_utils.py
+++ b/paibox/components/synapses/conv_utils.py
@@ -630,7 +630,7 @@ def _convtranspose2d_faster(
     ]
     # output_padding
     out = np.pad(out, ((0, 0), (0, output_padding[0]), (0, output_padding[1])))
-    
+
     if bias:
         _bias = bias.squeeze()
         assert _bias.shape == (cout,)

From 79890e4a8fb80399ac02aa8152724b584512f959 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 16 Dec 2024 15:27:39 +0800
Subject: [PATCH 184/187] =?UTF-8?q?=F0=9F=8E=A8=20format(mapper):=20format?=
 =?UTF-8?q?=20improved?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paibox/backend/mapper.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/paibox/backend/mapper.py b/paibox/backend/mapper.py
index e0bfb715..fb358726 100644
--- a/paibox/backend/mapper.py
+++ b/paibox/backend/mapper.py
@@ -130,31 +130,30 @@ def compile(
         """Compile the network with optimization options.
 
         Args:
-            - weight_bit_optimization: whether to optimize weight precision. For example, weights declared as   \
-                INT8 are treated as smaller precision based on their actual values (when the weight are all     \
+            weight_bit_optimization (bool): whether to optimize weight precision. For example, weights declared \
+                as INT8 are treated as smaller precision based on their actual values (when the weight are all  \
                 between [-8, 7], they can be treated as INT4). By default, it is specified by the corresponding \
                 compile option in the backend configuration item. Default is true.
-            - grouping_optim_target: specify the optimization goal of neuron grouping, which can be `latency`,  \
-                `core` or `both`, which respectively represent the optimization goal of delay/throughput,       \
-                occupied cores, or both. The default is specified by the corresponding compilation option in the\
-                backend configuration item. Default is 'both'.
-            - no_twisted_branch (for advanced use): when parsing the network topology, whether or not to prohibit intersecting     \
-                branch structures will cause such structures to be processed. For example:
+            grouping_optim_target ("latency", "core", "both"): specify the optimization goal of neuron grouping,\
+                which can be `latency`, `core` or `both` which respectively represent the optimization goal of  \
+                delay/throughput, occupied cores, or both. The default is specified by the corresponding        \
+                compilation option in the backend configuration item. Default is 'both'.
+            no_twisted_branch (bool): only for advanced use. when parsing the network topology, whether or not  \
+                to prohibit intersecting branch structures will cause such structures to be processed.          \
+                For example:
 
                 I -> A -> B -> C
                        ------>
 
-                The out-degree of node A is > 1, and its successor node C has an in-degree > 1. If `no_twisted_branch`    \
-                is true, A will be copied & denoted as A', whose forward connection is preserved.
+                The out-degree of node A is > 1, and its successor node C has an in-degree > 1. If true, A will \
+                be copied & denoted as A', whose forward connection is preserved.
 
                 I -> A -> B -> C
                   -> A'------>
 
-                Default is false.
-
-            - multicast_optim (in dev): whether to perform multicast optimization. If true, the optimization is \
-                performed on all nodes in the network. If a node list is passed, the optimization is attempted  \
-                on the specified nodes only. Default is false.
+            multicast_optim (bool, Sequence[NodeType]): whether to perform multicast optimization. If true, the \
+                optimization is performed on all nodes in the network. If passing a node list, the optimization \
+                is attempted on the specified nodes only. Default is false.
                 TODO A description of it is to be added
 
         Return: network information after compilation in dictionary format.

From 545689336c1167a362f18ad0bca92e0721f1aeb2 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 16 Dec 2024 15:30:44 +0800
Subject: [PATCH 185/187] =?UTF-8?q?=F0=9F=93=9D=20docs:=20update=20the=20g?=
 =?UTF-8?q?uide=20&=20changelog=20for=20`v1.2.0`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md            |  10 +++-
 docs/Guide-of-PAIBox.md | 130 +++++++++++++++++++++++++++++-----------
 2 files changed, 103 insertions(+), 37 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5473a4ee..39c02c7a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -66,7 +66,7 @@
 
   1. 子网络现在直接在主网络内部 `self.subnet=...` 例化即可
   2. 编译选项现在直接通过 `paibox.Mapper.compile(...)` 传入，默认配置不变
-  3. 在 `paibox.Mapper.export()` 中使用 `split_by_chip` 指定配置帧文件是否以芯片分割，默认不分割。原 `split_by_coord` 弃用
+  3. 在 `paibox.Mapper.export()` 中通过 `split_by_chip` 选项指定配置帧文件是否以芯片分割，默认不分割。原 `split_by_coord` 选项移除
 
 ## v1.1.1
 
@@ -82,3 +82,11 @@
 - 提高 `paicorelib` 依赖版本至 `>=1.3.1`
 - 支持1D脉冲平均/最大池化算子
 - 重构路由算法，支持嵌套路由
+
+## v1.2.0
+
+- 支持 ANN 模式下半折叠算子的构建与部署
+- 支持分组卷积，包括 SNN、全展开形式与半折叠形式卷积
+- 优化后端构建网络的流程
+- 修复当在编译时开启 `core_estimate_only` 选项后导出时将报错的错误。现在在开启此选项后无法导出
+- 修复了编译后网络的属性 `inherent_timestep` （即第一次输出有效计算结果的时刻）错误的计算方法。现在将通过标注数据流格式的方式计算得到该属性
diff --git a/docs/Guide-of-PAIBox.md b/docs/Guide-of-PAIBox.md
index 9c648cce..06db932b 100644
--- a/docs/Guide-of-PAIBox.md
+++ b/docs/Guide-of-PAIBox.md
@@ -6,7 +6,7 @@
 python = "^3.9"
 pydantic = "^2.0.3"
 numpy = "^1.26.0"
-paicorelib = "~1.3"
+paicorelib = ">=1.3.1"
 ```
 
 可选依赖：
@@ -59,7 +59,7 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Tr
 
 - `shape`：代表神经元组的尺寸，其形式可以是整形标量、元组或列表。
 - `threshold`：神经元阈值，其形式为整数。
-- `reset_v`：神经元的复位电位，可选参数。当指定时，神经元在发放后，进行硬复位( `v = resetv` )；当未指定时，进行软复位( `v -= pos_threshold` )。默认进行软复位。
+- `reset_v`：神经元的复位电位，可选参数。当指定时，神经元在发放后，进行硬复位( `v=resetv` )；当未指定时，进行软复位( `v-=pos_thres` )。默认进行软复位。
 - `neg_threshold`：负阈值，神经元膜电位所允许的最小值，必须是非正整数。当未指定时，默认为硬件所允许的最小负整数。
 - `delay`：设定神经元输出的延迟。默认为1，即本时间步的计算结果，**下一时间步**传递至后继节点。
 - `tick_wait_start`：设定神经元启动时间。神经元将在第 `T` 个时间步时启动。0表示不启动。默认为1。
@@ -107,7 +107,7 @@ n2 = pb.LIF(shape=128, threshold=10, reset_v=1, bias=-1, name='n2')
 ```
 
 - `leak_v`：泄露，有符号数。
-- `bias`：偏置，有符号数。神经元将**在阈值比较前泄露**，从而实现“偏置”的效果。 `bias` 与 `leak_v` 效果将叠加。支持数组形式的偏置，这通常用于实现卷积的分通道偏置，偏置的尺寸应与神经元尺寸相关，这取决于偏置的实际含义：可以为标量（例如，线性层的偏置）、`(C,)` 数组（其中 `C` 为通道数）或 `(C,H,W)` 数组。
+- `bias`：偏置，有符号数。神经元将**在阈值比较前泄露**，从而实现“偏置”的效果。`bias` 与 `leak_v` 效果将叠加。支持数组形式的偏置，这通常用于实现卷积的分通道偏置，偏置的尺寸应与神经元尺寸相关，这取决于偏置的实际含义：可以为标量、`(C,)` 数组（其中 `C` 为通道数或输出特征数）或与本层神经元尺寸相同的数组。
 - 其他参数含义与 IF 相同。
 
 #### Tonic Spiking
@@ -214,7 +214,7 @@ s1= pb.FullConn(source=n1, dest=n2, weights=weight1, conn_type=pb.SynConnType.Al
 
   其权重以标量的形式储存。
 
-- 数组：尺寸要求为 `(N2,)`，可以自定义每组对应神经元之间的连接权重。如下例所示，设置 `weights` 为 `[1, 2, 3, 4, 5]`，
+- 数组：尺寸要求为 `(N2,)`，可以自定义每组对应神经元之间的连接权重。如下例所示，设置 `weights` 为 `[1,2,3,4,5]`，
 
   ```python
   n1 = pb.IF(shape=5, threshold=1)
@@ -234,16 +234,16 @@ s1= pb.FullConn(source=n1, dest=n2, weights=weight1, conn_type=pb.SynConnType.Al
 
 ##### Identity 恒等映射
 
-具有缩放因子的单对单连接，即 `One2One` 中权重项为标量的特殊情况。
+具有标量缩放因子的单对单连接，即 `One2One` 中权重项为标量的特殊情况。
 
 #### 2D矩阵乘法 MatMul2d
 
 专门用于表示二维矩阵乘法， $y=x\cdot w$ 或 $y=x^T\cdot w$
 
-- 例如，输入尺寸为 `(n, k)` ，权重尺寸为 `(k, m)`，输出尺寸为 `(n, m)`
-- 当输入尺寸为 `(k, n)` 时，会**自动进行转置**
+- 例如，输入尺寸为 `(n,k)` ，权重尺寸为 `(k,m)`，输出尺寸为 `(n,m)`
+- 当输入尺寸为 `(k,n)` 时，会**自动进行转置**
 - 输入维度最大为2维
-- 当输入维度小于2维，将自动补齐，即 `(N, )` 补齐为 `(1, N)`
+- 当输入维度小于2维，将自动补齐，即 `(N,)` 补齐为 `(1,N)`
 
 ```python
 n1 = pb.IF(shape=(8, 16), threshold=1)
@@ -251,14 +251,14 @@ n2 = pb.IF(shape=(8, 10), threshold=1)
 s1 = pb.MatMul2d(source=n1, dest=n2, weights=np.ones((16, 10), dtype=np.int8))
 ```
 
-⚠️ 不要与 `FullConn` 混淆。`FullConn` 需要传入 `N*M` 矩阵，其中 `N` 为前向神经元组数目，`M` 为后向神经元组数目。而 `MatMul2d` 中传入的矩阵尺寸并非 `N*M` ，它最终将展开为 `N*M` 矩阵。如下式所示，由于输入/输出数据在芯片中只能以一维表示，因此，它在芯片中的实现为：
+⚠️ 不要与 `FullConn` 混淆。`FullConn` 需要传入 $N*M$ 矩阵，其中 $N$ 为前向神经元组数目，$M$ 为后向神经元组数目。而 `MatMul2d` 中传入的矩阵尺寸并非 $N*M$ ，它最终将展开为 $N*M$ 矩阵。如下式所示，由于输入/输出数据在芯片中只能以一维表示，因此，它在芯片中的实现为：
 
 $$
 \begin{bmatrix}x_{11}& x_{12}& x_{13}\\ x_{21}& x_{22}& x_{23}\end{bmatrix}\cdot\begin{bmatrix}w_{11}& w_{12}\\ w_{21}& w_{22}\\ w_{31}& w_{32}\end{bmatrix}=\begin{bmatrix}y_{11}& y_{12}\\ y_{21}& y_{22}\end{bmatrix}
 $$
 
 $$
-\begin{bmatrix}x_{11}\\ x_{12}\\ x_{13}\\ x_{21}\\ x_{22}\\ x_{23}\end{bmatrix}^T\cdot\begin{bmatrix}w_{11}& w_{12}& 0& 0\\ w_{21}& w_{22}& 0& 0\\ w_{31}& w_{32}& 0& 0\\0& 0& w_{11}& w_{12}\\0& 0& w_{21}& w_{22}\\0& 0& w_{31}& w_{32}\end{bmatrix}=\begin{bmatrix}y_{11}\\ y_{12}\\ y_{21}\\ y_{22}\end{bmatrix}^T
+\begin{bmatrix}x_{11}\\ x_{12}\\ x_{13}\\ x_{21}\\ x_{22}\\ x_{23}\end{bmatrix}^T\cdot\begin{bmatrix}w_{11}& w_{12}& 0& 0\\ w_{21}& w_{22}& 0& 0\\ w_{31}& w_{32}& 0& 0\\ 0& 0& w_{11}& w_{12}\\ 0& 0& w_{21}& w_{22}\\ 0& 0& w_{31}& w_{32}\end{bmatrix}=\begin{bmatrix}y_{11}\\ y_{12}\\ y_{21}\\ y_{22}\end{bmatrix}^T
 $$
 
 对于 $y=x^T\cdot w$，将转置作用于 $w$ 即可等效实现。例如：
@@ -268,7 +268,7 @@ $$
 $$
 
 $$
-\begin{bmatrix}x_{11}\\x_{12}\\x_{21}\\x_{22}\\x_{31}\\x_{32}\end{bmatrix}^T\cdot\begin{bmatrix}w_{11}& w_{12}& 0& 0\\0& 0& w_{11}& w_{12}\\ w_{21}& w_{22}& 0& 0\\ 0& 0& w_{21}& w_{22}\\ w_{31}& w_{32}& 0& 0\\0& 0& w_{31}& w_{32}\end{bmatrix}=\begin{bmatrix}y_{11}\\ y_{12}\\ y_{21}\\ y_{22}\end{bmatrix}^T
+\begin{bmatrix}x_{11}\\ x_{12}\\ x_{21}\\ x_{22}\\ x_{31}\\ x_{32}\end{bmatrix}^T\cdot\begin{bmatrix}w_{11}& w_{12}& 0& 0\\ 0& 0& w_{11}& w_{12}\\ w_{21}& w_{22}& 0& 0\\ 0& 0& w_{21}& w_{22}\\ w_{31}& w_{32}& 0& 0\\ 0& 0& w_{31}& w_{32}\end{bmatrix}=\begin{bmatrix}y_{11}\\ y_{12}\\ y_{21}\\ y_{22}\end{bmatrix}^T
 $$
 
 #### 1D卷积
@@ -277,7 +277,7 @@ $$
 
 - `kernel`：卷积核权重。
 - `stride`：步长，标量。默认为1。
-- `padding`：填充，标量。
+- `padding`：填充，标量。默认为0。
 - `kernel_order`：指定卷积核维度顺序为 `OIL` 或 `IOL` 排列。默认为 `OIL`。
 - 神经元维度顺序仅支持 `CL`。
 
@@ -294,8 +294,8 @@ conv1d = pb.Conv1d(n1, n2, kernel=kernel, stride=1, padding=0, kernel_order="OIL
 全展开形式2D卷积为全连接突触的一种特殊表达。需**严格指定**输入神经元的尺寸与维度、卷积核权重、卷积核维度顺序与步长。对于输出神经元的具体尺寸不做严格要求。
 
 - `kernel`：卷积核权重。
-- `stride`：步长，标量或元组格式。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。默认为1。
-- `padding`：填充，可以为标量或元组。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。
+- `stride`：步长，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为1。
+- `padding`：填充，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为0。
 - `kernel_order`：指定卷积核维度顺序为 `OIHW` 或 `IOHW` 排列。默认为 `OIHW`。
 - 神经元维度顺序仅支持 `CHW`。
 
@@ -334,9 +334,9 @@ convt1d = pb.ConvTranspose1d(n1, n2, kernel=kernel, stride=1, padding=0, output_
 全展开形式2D转置卷积为全连接突触的一种特殊表达。需**严格指定**输入神经元的尺寸与维度、卷积核权重、卷积核维度顺序与步长。对于输出神经元的具体尺寸不做严格要求。
 
 - `kernel`：卷积核权重。
-- `stride`：步长，可以为标量或元组。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。
-- `padding`：填充，可以为标量或元组。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。
-- `output_padding`：对输出特征图的一侧进行额外的填充，可以为标量或元组。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。
+- `stride`：步长，可以为标量或元组。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。
+- `padding`：填充，可以为标量或元组。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。
+- `output_padding`：对输出特征图的一侧进行额外的填充，可以为标量或元组。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。
 - `kernel_order`：指定卷积核维度顺序为 `OIHW` 或 `IOHW` 排列。
 - 神经元维度顺序仅支持 `CHW`。
 - 参数详细含义参见：[pytorch/ConvTranspose2d](https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html#torch.nn.ConvTranspose2d)
@@ -401,8 +401,8 @@ for t in range(20):
 其中，
 
 - `kernel`：卷积核权重。
-- `stride`：步长，可以为标量或元组。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。
-- `padding`：对输入进行填充，可以为标量或元组。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。
+- `stride`：步长，可以为标量或元组。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。
+- `padding`：对输入进行填充，可以为标量或元组。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。
 - `kernel_order`：指定卷积核维度顺序为 `OIHW` 或 `IOHW` 排列。
 - `tau`：膜电位时间常数。
 - `decay_input`：输入是否也会参与衰减。
@@ -645,8 +645,8 @@ class Net(pb.DynSysGroup):
 - `neuron_a`：第一个操作数。
 - `neuron_b`：第二个操作数。
 - `delay`：设定模块输出的延迟。默认为1，即本时间步的计算结果，**下一时间步**传递至后继节点。
-- `tick_wait_start`：设定模块启动时间。模块将在第 `T` 个时间步时启动。0表示不启动。默认为1。
-- `tick_wait_end`：设定模块持续工作时长。模块将持续工作 `T` 个时间步。0表示**持续工作**。默认为0。
+- `tick_wait_start`：设定模块启动时刻。模块将在第 `T` 个时间步时启动。0表示不启动。默认为1。
+- `tick_wait_end`：设定模块**持续工作**时长。模块将持续工作 `T` 个时间步。0表示**持续工作**。默认为0。
 - `keep_shape`：是否在仿真记录数据时保持尺寸信息，默认为 `False`。实际进行运算的尺寸仍视为一维。
 - `name`：模块的名称。可选参数。
 
@@ -668,14 +668,14 @@ s3 = pb.FullConn(p2d, n2, conn_type=pb.SynConnType.One2One)
 其中：
 
 - `neuron`：待池化的神经元。
-- `kernel_size`：池化窗口的尺寸，标量或元组格式。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。
+- `kernel_size`：池化窗口的尺寸，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。
 - `stride`：步长，可选参数，标量或元组格式，默认为 `None`，即池化窗口的尺寸。
-- `padding`：填充，可以为标量或元组。当为标量时，对应为 `(x, x)`；当为元组时，则对应为 `(x, y)`。默认为0。
+- `padding`：填充，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为0。
 - 神经元维度顺序仅支持 `CHW`。
 
 对于平均池化 `SpikingAvgPool2d`，它还有如下参数可配置：
 
-- `threshold`：平均池化的比较阈值，芯片需要通过神经元的阈值比较间接地实现除法。当不指定时，阈值为 $\text{round}(\text{kernel\_size}/2)$。池化窗口的输入做累加后与该阈值进行比较，可等价于平均池化的操作，即 $o_j=\sum^{k-1}_{i=0}x_{ij} \ge V_{th,pos}$，其中 $k$ 为池化窗口尺寸，$x_{ij}$ 为每个池化窗口内的输入特征图元素，$o_j$ 为第 $j$ 个输出特征图元素。
+- `threshold`：平均池化的比较阈值，芯片需要通过神经元的阈值比较间接地实现除法。当不指定时，阈值为 $\text{round}(\text{ksize}/2)$。池化窗口的输入做累加后与该阈值进行比较，可等价于平均池化的操作，即 $o_j=\sum^{k-1}_{i=0}x_{ij} \ge V_{th,pos}$，其中 $k$ 为池化窗口尺寸，$x_{ij}$ 为每个池化窗口内的输入特征图元素，$o_j$ 为第 $j$ 个输出特征图元素。
 
 ### 2D平均池化（膜电位相关）
 
@@ -709,7 +709,7 @@ $$
 V_{pre} + a\cdot f_a + b\cdot f_b \ge V_{th,pos}
 $$
 
-对于 `SpikingSub`，$f_a=1$，$f_b=-1$.
+对于 `SpikingSub`， $f_a=1$， $f_b=-1$。
 
 ```python
 n1 = pb.IF((10,), 1, 0, delay=1, tick_wait_start=1)
@@ -722,10 +722,9 @@ sub1 = pb.SpikingSub(n1, n2, overflow_strict=False, delay=1, tick_wait_start=2)
 
 - `neuron_a`：第一个操作数。
 - `neuron_b`：第二个操作数。在减法中作被减数。
-- `factor_a`：第一个操作数的缩放因子，正整数标量。默认为1，仅在 `SpikingAdd` 中使用。
-- `factor_b`：第一个操作数的缩放因子，正整数标量。默认为1，仅在 `SpikingAdd` 中使用。
+- `factor_a/b`：第一/二个操作数的缩放因子，正整数标量。默认为1，仅在 `SpikingAdd` 中使用。
 - `pos_thres`：正阈值。默认为1，仅在 `SpikingAdd` 中使用。
-- `reset_v`：复位电位，可选参数。当指定时，神经元在发放后，进行硬复位( `v = resetv` )；当未指定时，进行软复位( `v -= pos_threshold` )。默认进行软复位，仅在 `SpikingAdd` 中使用。
+- `reset_v`：复位电位，可选参数。当指定时，神经元在发放后，进行硬复位( `v=resetv` )；当未指定时，进行软复位( `v-=pos_thres` )。默认进行软复位，仅在 `SpikingAdd` 中使用。
 - `overflow_strict`：是否严格检查运算结果溢出。如果启用，则在仿真中，当脉冲加、减运算结果溢出时将报错。默认为 `False`。
 
 ### 2D/3D转置
@@ -747,6 +746,65 @@ t3d = pb.Transpose3d(n2, axes=(1, 2, 0), tick_wait_start=2)
 - `neuron`：待转置其输出脉冲的神经元。对于二维转置，支持输入尺寸为1或2维；对于三维转置，支持输入尺寸为2或3维。尺寸不足时，自动补1。
 - `axes`：（仅三维转置）如果指定，则必须是包含 `[0,1,…,N-1]` 排列的元组或列表，其中 `N` 是矩阵的轴（维度）数。返回数组的第 `i` 轴将对应于输入的编号为 `axes[i]` 的轴。若未指定，则默认为 `range(N)[::-1]`，这将反转轴的顺序。具体参数含义参见：[numpy.transpose](https://numpy.org/doc/1.26/reference/generated/numpy.transpose.html#numpy.transpose)
 
+### 线性层
+
+适用于 ANN 的线性层。
+
+```python
+n1 = pb.ANNNeuron((1024,))
+l1 = pb.Linear(n1, 10, w, bias=10, bit_trunc=8)
+```
+
+其中：
+
+- `neuron_s`：输入特征图（神经元）。
+- `out_features`：输出特征，可以理解为输出神经元。
+- `weights`：权重矩阵。
+- `bias`：偏置，有符号数。可以为标量或 `(out_features,)` 数组。
+- `bit_trunc`：神经元输出的8位无符号数的截断位置。默认为8，即截取 [7:0] 位。
+
+### 半折叠形式算子
+
+以下算子仅适用于ANN，且对数据流形式有严格要求，因此要求神经网络中的所有算子均为半折叠形式。
+
+当神经网络采用半折叠形式时，对于尺寸为 `(C,H,W)` 的特征图，将展开为 `W*(C,H)` 的形式输入，即对于一张特征图需要 `W` 个时间步完成输入（`H` 与 `W` 地位相同，可以互换）。在例化半折叠形式的算子时，卷积核的尺寸依然为（本层的） `(O,I,K,K)`，然而中间特征图的尺寸却减小为 `(C,H)`，`W` 维度被折叠。这显著减少了芯片内所需存储的中间特征图尺寸。作为代价，半折叠形式的卷积（类）算子需至少  `Ow `个时间步才完全输出，其中 `Ow` 为本层的输出特征图宽度。这使得网络模型的推理（得到第一次有效输出数据的）耗时增加。
+
+#### 半折叠2D卷积
+
+- `neuron_s`：输入特征图（神经元），要求为半折叠算子或输入节点。
+- `kernel`：卷积核权重，维度顺序为 `OIHW`。
+- `stride`：步长，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为1。
+<!-- - `padding`：填充，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为0。 -->
+- `bias`：偏置，有符号数。可以为标量或 `(C,)` 数组。默认为0。
+- `bit_trunc`：神经元输出的8位无符号数的截断位置。默认为8，即截取 [7:0] 位。
+
+#### 半折叠2D最大池化
+
+- `neuron_s`：输入特征图（神经元），要求为半折叠算子或输入节点。
+- `kernel_size`：池化窗口的尺寸，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x, y)`。
+- `stride`：步长，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为 `None`，即池化窗口的尺寸。
+- `bit_trunc`：神经元输出的8位无符号数的截断位置。默认为8，即截取 [7:0] 位。
+
+⚠️ 半折叠最大池化不支持 padding。
+
+#### 半折叠2D平均池化
+
+- `neuron_s`：输入特征图（神经元），要求为半折叠算子或输入节点。
+- `kernel_size`：池化窗口的尺寸，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x, y)`。
+- `stride`：步长，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为 `None`，即池化窗口的尺寸。
+<!-- - `padding`：填充，标量或元组格式。当为标量时，对应为 `(x,x)`；当为元组时，则对应为 `(x,y)`。默认为0。 -->
+- `bit_trunc`：神经元输出的8位无符号数的截断位置。默认为 `8+ksize.bit_length()-1`，其中 `ksize` 为池化窗口的尺寸。注意，由于平均池化依赖除法实现，而芯片计算核只能通过右移位实现2的整数幂除法。当池化窗口尺寸不为2的整数幂时，只能近似通过上式有损计算除法。例如，当池化窗口为 (3,3) 时，最终将 /8，而非 /9。这样的近似误差可以考虑在量化阶段，为后续层的权重 $w\cdot 8/9$ 而减小。
+
+#### 半折叠线性层
+
+半折叠线性层接受的输入为半折叠形式，而其计算结果在单一时间步上输出。即它将半折叠形式的数据流转换为全展开形式的数据流。
+
+- `neuron_s`：输入特征图（神经元），要求为半折叠算子或输入节点。
+- `out_features`：输出特征，可以理解为输出神经元。
+- `weights`：权重矩阵。
+- `bias`：偏置，有符号数。可以为标量或 `(out_features,)` 数组。
+- `bit_trunc`：神经元输出的8位无符号数的截断位置。默认为8，即截取 [7:0] 位。
+
 ## 网络模型
 
 在 PAIBox 中，可以通过继承 `DynSysGroup`（或 `Network`）来实现，并在其中例化基础组件与功能模块，完成网络模型的构建。以一个简单的两层全连接网络为例：
@@ -915,14 +973,14 @@ sim.reset()
 
 调用 `run` 运行仿真，其中：
 
-- `duration`：指定仿真时间步长。请注意，仿真时需要计算网络的最长路径(delay)，并计入仿真步长中以获取有效的输出。
+- `duration`：指定仿真时间步长。请注意，仿真步长需要大于网络模型的有效层数，才会得到并记录有效的仿真数据。
 - `reset`：是否对网络模型中组件进行复位。默认为 `False`。这可实现在一次仿真的不同时间步，输入不同的数据。
 
 ## 编译、映射与导出
 
 模型映射将完成网络拓扑解析、分割、路由坐标分配、配置信息与帧文件导出等一系列工作。
 
-例化 `Mapper`，传入所构建的网络模型，编译，最后导出。
+例化 `Mapper`，传入所构建的网络模型，构建、编译、导出。
 
 ```python
 mapper = pb.Mapper()
@@ -939,19 +997,19 @@ mapper.clear()
 
 其中，编译时有如下参数可指定：
 
-- `core_estimate_only`：仅导出预估所需核数目，不进行后续部署。默认关闭。
-- `weight_bit_optimization`: 是否对权重精度进行优化处理。这将使得声明时为 INT8 的权重根据实际值当作更小的精度处理（当权重的值均在 [-8, 7] 之间，则可当作 INT4 进行处理）。默认开启。
+- `core_estimate_only`：仅导出预估所需核数目，不进行后续部署。默认关闭。当启用此项时，编译工作未全部进行，因此无法导出任何信息。
+- `weight_bit_optimization`: 是否对权重精度进行优化处理。这将使得声明时为 INT8 的权重根据实际值当作更小的精度处理。例如，当权重的值均在 [-8, 7] 之间，则可当作 INT4 进行处理。默认开启。
 - `grouping_optim_target`：指定神经元分组的优化目标，可以为 `"latency"`，`"core"` 或 `"both"`，分别代表以延时/吞吐率、占用核资源为优化目标、或二者兼顾。默认 `both`。
-- 同时，该方法将返回字典形式的编译后网络的信息。
+- 将返回字典形式的编译后网络的信息。
 
 导出时有如下参数可指定：
 
 - `write_to_file`: 是否将配置帧导出为文件。默认为 `True`。
 - `fp`：导出目录。若未指定，则默认为后端配置选项 `build_directory` 所设置的目录（当前工作目录）。
 - `format`：导出交换文件格式，可以为 `bin`、`npy` 或 `txt`。默认为 `bin`。
-- `split_by_chip`：是否将配置帧以芯片坐标进行分割，由此生成的配置帧文件命名形如"config_chip0_core0"、"config_chip0_core1"、"config_chip1_core0"。默认为 `False`，即最终导出为一个文件 "config_all"。
+- `split_by_chip`：是否将配置帧以芯片坐标进行分割，由此生成的配置帧文件命名形如"config_chip0_core0.format"、"config_chip0_core1.format"、"config_chip1_core0.format"。默认为 `False`，即最终导出为一个文件 "config_all.format"。
 - `export_core_params`：是否导出实际使用核参数至 json 文件，以直观显示实际使用核的配置信息。默认为 `False`。
-- `export_clk_en_L2`：是否导出L2簇时钟串口数据。默认为 `False`。
+- `export_clk_en_L2`：是否导出 L2 簇时钟串口数据。默认为 `False`。硬件平台可根据该数据关闭芯片其他未使用的 L2 簇时钟以降低功耗。
 - `use_hw_sim`：是否使用硬件仿真器。若使用，将额外导出 `bin` 格式的配置帧文件。默认为 `True`。
 
 同时，该方法将返回模型的配置项字典 `GraphInfo`，包括：
@@ -959,7 +1017,7 @@ mapper.clear()
 - `input`：输入节点信息字典。
 - `output`：输出目的地信息字典。
 - `members`：中间层所在物理核的配置项字典。
-- `inherent_timestep`：网络的最长时间步。
+- `inherent_timestep`：网络的最长时间步，即得到网络第一个有效输出数据的用时。
 - `n_core_required`：网络**需要**的物理核数目。
 - `n_core_occupied`：网络**实际占用**的物理核数目。
 - `misc`：其他杂项信息。例如，编译后的网络名称；上述L2簇时钟串口数据在该键 `["clk_en_L2"]` 中。

From 43499f6d4b80c42d52c191fe63f683b9482ab6e7 Mon Sep 17 00:00:00 2001
From: KafCoppelia <k740677208@gmail.com>
Date: Mon, 16 Dec 2024 15:30:53 +0800
Subject: [PATCH 186/187] =?UTF-8?q?=F0=9F=94=96=20v1.2.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 08223a17..d08e6666 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "paibox"
-version = "1.2.0a2"
+version = "1.2.0"
 description = "Toolchain of PAICORE 2.0"
 authors = ["Ziru Pan <zrpan@stu.pku.edu.cn>"]
 maintainers = [

From b87de42eed9153be847101ca4b420eccb048c41c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2024 07:32:10 +0000
Subject: [PATCH 187/187] :rotating_light: auto fix by pre-commit hooks

---
 docs/Guide-of-PAIBox.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Guide-of-PAIBox.md b/docs/Guide-of-PAIBox.md
index 06db932b..9003b4e1 100644
--- a/docs/Guide-of-PAIBox.md
+++ b/docs/Guide-of-PAIBox.md
@@ -767,7 +767,7 @@ l1 = pb.Linear(n1, 10, w, bias=10, bit_trunc=8)
 
 以下算子仅适用于ANN，且对数据流形式有严格要求，因此要求神经网络中的所有算子均为半折叠形式。
 
-当神经网络采用半折叠形式时，对于尺寸为 `(C,H,W)` 的特征图，将展开为 `W*(C,H)` 的形式输入，即对于一张特征图需要 `W` 个时间步完成输入（`H` 与 `W` 地位相同，可以互换）。在例化半折叠形式的算子时，卷积核的尺寸依然为（本层的） `(O,I,K,K)`，然而中间特征图的尺寸却减小为 `(C,H)`，`W` 维度被折叠。这显著减少了芯片内所需存储的中间特征图尺寸。作为代价，半折叠形式的卷积（类）算子需至少  `Ow `个时间步才完全输出，其中 `Ow` 为本层的输出特征图宽度。这使得网络模型的推理（得到第一次有效输出数据的）耗时增加。
+当神经网络采用半折叠形式时，对于尺寸为 `(C,H,W)` 的特征图，将展开为 `W*(C,H)` 的形式输入，即对于一张特征图需要 `W` 个时间步完成输入（`H` 与 `W` 地位相同，可以互换）。在例化半折叠形式的算子时，卷积核的尺寸依然为（本层的） `(O,I,K,K)`，然而中间特征图的尺寸却减小为 `(C,H)`，`W` 维度被折叠。这显著减少了芯片内所需存储的中间特征图尺寸。作为代价，半折叠形式的卷积（类）算子需至少 `Ow `个时间步才完全输出，其中 `Ow` 为本层的输出特征图宽度。这使得网络模型的推理（得到第一次有效输出数据的）耗时增加。
 
 #### 半折叠2D卷积