内存计划

内存计划#

import tvm
from tvm import te
import numpy as np
from tvm import relay

def storage_type(mod):
    return relay.TypeCall(mod.get_global_type_var("Storage"), [])
mod = tvm.IRModule()
mod.import_from_std("core.rly")
sto = relay.Var("x", storage_type(mod))
sh = relay.const(np.array([1, 2]), dtype="int64")
at = relay.op.memory.alloc_tensor(sto, relay.const(0, dtype="int64"), sh)
mod["main"] = relay.Function([sto], at)
relay.transform.InferType()(mod)
type Storage {
  
}

def @main(%x: Storage[] /* ty=Storage[] */) -> Tensor[(1, 2), float32] {
  memory.alloc_tensor(%x, 0i64 /* ty=int64 */, meta[relay.Constant][0] /* ty=Tensor[(2), int64] */, const_shape=meta[relay.Constant][1], assert_shape=[]) /* ty=Tensor[(1, 2), float32] */
}

add 内存计划#

def check_memory_plan(func, check_fn):
    # Build Module
    mod = tvm.IRModule().from_expr(func)

    # Convert arguments.
    args = []
    for param in func.params:
        param = param.type_annotation
        sh = [int(sh) for sh in param.shape]
        data = np.random.rand(*sh).astype(param.dtype)
        args.append(tvm.nd.array(data))

    # TODO(mbs): Why does the executor need to be shared? Seems wrong.
    ex = relay.create_executor("vm", mod)

    # Compute without memory planning.
    no_plan_result = ex.evaluate()(*args)

    # Compute with memory planning.
    with tvm.transform.PassContext(opt_level=1, disabled_pass=["MemoryPlan"]):
        plan_result = ex.evaluate()(*args)

    # Compute Python result.
    py_res = check_fn(*[arg.numpy() for arg in args])

    # First check that the two VM results agree.
    np.testing.assert_allclose(no_plan_result.numpy(), plan_result.numpy())

    # Finally check that the results match the Python result.
    np.testing.assert_allclose(plan_result.numpy(), py_res)
def check_add(x):
    return x + x


def test_add():
    x = relay.var("x", shape=(2,))
    z = x + x
    func = relay.Function(
        [
            x,
        ],
        z,
    )
    check_memory_plan(func, check_add)
test_add()
def check_add_sub(x, y):
    z = x + x
    return z - y


def test_add_sub():
    x = relay.var("x", shape=(10,))
    y = relay.var("y", shape=(10,))
    z = x + x
    z = z - y
    func = relay.Function([x, y], z)
    check_memory_plan(func, check_add_sub)
def check_no_fuse(x, y, w):
    z = x + y
    return np.matmul(z, np.transpose(w))


def test_no_fuse():
    x = relay.var("x", shape=(5, 1))
    y = relay.var("y", shape=(5, 1))
    w = relay.var("w", shape=(5, 1))
    z = x + y
    out = relay.op.nn.dense(z, w)
    func = relay.Function([x, y, w], out)
    check_memory_plan(func, check_no_fuse)
test_no_fuse()
One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.