Skip to content

Slow all to one copies #1232

@syamajala

Description

@syamajala

Is there some way to improve the performance of this example:

import numpy as np
import cupynumeric as cpy
from legate.core.task import task, InputStore, OutputStore, ReductionStore, ADD
from legate.core import(
    VariantCode,
    broadcast,
    align,
    dimension,
    constant,
    get_legate_runtime,
    LegateDataInterface,
    LogicalStore,
    get_machine,
    TaskTarget,
    Machine
    )

def get_store(obj: LegateDataInterface) -> LogicalStore:
    iface = obj.__legate_data_interface__
    assert iface["version"] == 1
    data = iface["data"]
    # There should only be one field
    assert len(data) == 1
    field = next(iter(data))
    assert not field.nullable
    column = data[field]
    assert not column.nullable
    return column.data

@task(
    variants=(VariantCode.CPU,)
)
def fill(arr_store : ReductionStore[ADD]):
    arr = np.asarray(arr_store)
    arr += np.ones(arr.shape)

@task(
    variants=(VariantCode.CPU,)
)
def print_arr(arr_store : InputStore):
    arr = np.asarray(arr_store)
    print(arr[0])

machine = get_machine()
cpus = machine.only(TaskTarget.CPU).count()
print("CPUS:", cpus)

arr = cpy.zeros((400, 1024, 1024))

runtime = get_legate_runtime()
library = fill.library
fill_task = runtime.create_manual_task(library, fill.task_id, (cpus,))
fill_task.add_reduction(get_store(arr), ADD)
fill_task.execute()

library = print_arr.library
print_arr_task = runtime.create_manual_task(library, print_arr.task_id, (1,))
print_arr_task.add_input(get_store(arr))
print_arr_task.execute()

There is a profile here: https://legion.stanford.edu/prof-viewer/?url=https://sapling2.stanford.edu/~seshu/legion_prof_legate/

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions