-
Notifications
You must be signed in to change notification settings - Fork 88
Open
Description
Is there some way to improve the performance of this example:
import numpy as np
import cupynumeric as cpy
from legate.core.task import task, InputStore, OutputStore, ReductionStore, ADD
from legate.core import(
VariantCode,
broadcast,
align,
dimension,
constant,
get_legate_runtime,
LegateDataInterface,
LogicalStore,
get_machine,
TaskTarget,
Machine
)
def get_store(obj: LegateDataInterface) -> LogicalStore:
iface = obj.__legate_data_interface__
assert iface["version"] == 1
data = iface["data"]
# There should only be one field
assert len(data) == 1
field = next(iter(data))
assert not field.nullable
column = data[field]
assert not column.nullable
return column.data
@task(
variants=(VariantCode.CPU,)
)
def fill(arr_store : ReductionStore[ADD]):
arr = np.asarray(arr_store)
arr += np.ones(arr.shape)
@task(
variants=(VariantCode.CPU,)
)
def print_arr(arr_store : InputStore):
arr = np.asarray(arr_store)
print(arr[0])
machine = get_machine()
cpus = machine.only(TaskTarget.CPU).count()
print("CPUS:", cpus)
arr = cpy.zeros((400, 1024, 1024))
runtime = get_legate_runtime()
library = fill.library
fill_task = runtime.create_manual_task(library, fill.task_id, (cpus,))
fill_task.add_reduction(get_store(arr), ADD)
fill_task.execute()
library = print_arr.library
print_arr_task = runtime.create_manual_task(library, print_arr.task_id, (1,))
print_arr_task.add_input(get_store(arr))
print_arr_task.execute()
There is a profile here: https://legion.stanford.edu/prof-viewer/?url=https://sapling2.stanford.edu/~seshu/legion_prof_legate/