forked from Xilinx/mlir-aie
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatrix_scalar_add.py
74 lines (59 loc) · 2.41 KB
/
matrix_scalar_add.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# matrix_scalar_add/matrix_scalar_add.py -*- Python -*-
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
import numpy as np
import sys
from aie.iron import ObjectFifo, Program, Runtime, Worker
from aie.iron.placers import SequentialPlacer
from aie.iron.device import NPU1Col1, XCVC1902
from aie.iron.controlflow import range_
from aie.helpers.taplib import TensorTiler2D
# Size of the entire matrix
MATRIX_HEIGHT = 16
MATRIX_WIDTH = 128
MATRIX_SHAPE = (MATRIX_HEIGHT, MATRIX_WIDTH)
# Size of the tile to process
TILE_HEIGHT = 8
TILE_WIDTH = 16
TILE_SHAPE = (TILE_HEIGHT, TILE_WIDTH)
def my_matrix_add_one():
if len(sys.argv) != 3:
raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
if sys.argv[1] == "npu":
dev = NPU1Col1()
elif sys.argv[1] == "xcvc1902":
dev = XCVC1902()
else:
raise ValueError(f"[ERROR] Device name {sys.argv[1]} is unknown")
# Define tensor types
matrix_ty = np.ndarray[MATRIX_SHAPE, np.dtype[np.int32]]
tile_ty = np.ndarray[TILE_SHAPE, np.dtype[np.int32]]
# AIE-array data movement with object fifos
of_in = ObjectFifo(tile_ty, name="in0")
of_out = ObjectFifo(tile_ty, name="out0")
# Define a task to perform
def core_fn(of_in1, of_out1):
elem_in = of_in1.acquire(1)
elem_out = of_out1.acquire(1)
for i in range_(TILE_HEIGHT):
for j in range_(TILE_WIDTH):
elem_out[i, j] = elem_in[i, j] + 1
of_in1.release(1)
of_out1.release(1)
# Create a worker to perform the task
my_worker = Worker(core_fn, fn_args=[of_in.cons(), of_out.prod()])
# Define the data access pattern for input/output
tap = TensorTiler2D.simple_tiler(MATRIX_SHAPE, TILE_SHAPE)[0]
# Runtime operations to move data to/from the AIE-array
rt = Runtime()
with rt.sequence(matrix_ty, matrix_ty, matrix_ty) as (in_tensor, _, out_tensor):
rt.start(my_worker)
rt.fill(of_in.prod(), in_tensor, tap)
rt.drain(of_out.cons(), out_tensor, tap, wait=True)
# Place components (assign them resources on the device) and generate an MLIR module
return Program(dev, rt).resolve_program(SequentialPlacer())
print(my_matrix_add_one())