Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3
# Copyright (c) PyPTO Contributors.
# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
# CANN Open Software License Agreement Version 2.0 (the "License").
# Please refer to the License for details. You may not use this file except in compliance with the License.
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
# See LICENSE in the root of the software repository for the full text of the License.
# -----------------------------------------------------------------------------------------------------------
"""Tensormap-and-ringbuffer vector example: f = (a+b+1)*(a+b+2) + (a+b)."""

import torch
from simpler.task_interface import ArgDirection as D

from simpler_setup import SceneTestCase, TaskArgsBuilder, Tensor, scene_test


@scene_test(level=2, runtime="tensormap_and_ringbuffer")
class TestVectorExample(SceneTestCase):
"""f = (a+b+1)*(a+b+2) + (a+b), where a=2.0, b=3.0 -> f=47.0."""

CALLABLE = {
"orchestration": {
"source": "kernels/orchestration/example_orchestration.cpp",
"function_name": "aicpu_orchestration_entry",
"signature": [D.IN, D.IN, D.OUT],
},
"incores": [
{
"func_id": 0,
"source": "kernels/aiv/kernel_add.cpp",
"core_type": "aiv",
"signature": [D.IN, D.IN, D.OUT],
},
{
"func_id": 1,
"source": "kernels/aiv/kernel_add_scalar.cpp",
"core_type": "aiv",
"signature": [D.IN, D.OUT],
},
{
"func_id": 2,
"source": "kernels/aiv/kernel_mul.cpp",
"core_type": "aiv",
"signature": [D.IN, D.IN, D.OUT],
},
],
}

CASES = [
{
"name": "default",
"platforms": ["a5sim", "a5"],
"config": {"aicpu_thread_num": 4, "block_dim": 3},
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The block_dim is set to 3, but the orchestration logic in example_orchestration.cpp and the associated kernels do not appear to implement any data partitioning or parallelization across multiple blocks. For a single-tile operation of size 128x128, block_dim should be set to 1 to avoid redundant execution and potential race conditions when writing to the output tensor.

Suggested change
"config": {"aicpu_thread_num": 4, "block_dim": 3},
"config": {"aicpu_thread_num": 4, "block_dim": 1},

"params": {},
},
]

def generate_args(self, params):
SIZE = 128 * 128
return TaskArgsBuilder(
Tensor("a", torch.full((SIZE,), 2.0, dtype=torch.float32)),
Tensor("b", torch.full((SIZE,), 3.0, dtype=torch.float32)),
Tensor("f", torch.zeros(SIZE, dtype=torch.float32)),
)

def compute_golden(self, args, params):
args.f[:] = (args.a + args.b + 1) * (args.a + args.b + 2) + (args.a + args.b)


if __name__ == "__main__":
SceneTestCase.run_module(__name__)
Loading