{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2.5. TensorIR 练习"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import IPython\n",
    "import numpy as np\n",
    "import tvm\n",
    "from tvm.ir.module import IRModule\n",
    "from tvm.script import tir as T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 如何编写 TensorIR\n",
    "\n",
    "### 然后编写向量加法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# init data\n",
    "a = np.arange(16).reshape(4, 4)\n",
    "b = np.arange(16, 0, -1).reshape(4, 4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[16, 16, 16, 16],\n",
       "       [16, 16, 16, 16],\n",
       "       [16, 16, 16, 16],\n",
       "       [16, 16, 16, 16]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# numpy version\n",
    "c_np = a + b\n",
    "c_np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[16, 16, 16, 16],\n",
       "       [16, 16, 16, 16],\n",
       "       [16, 16, 16, 16],\n",
       "       [16, 16, 16, 16]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# low-level numpy version\n",
    "def lnumpy_add(a: np.ndarray, b: np.ndarray, c: np.ndarray):\n",
    "  for i in range(4):\n",
    "    for j in range(4):\n",
    "      c[i, j] = a[i, j] + b[i, j]\n",
    "c_lnumpy = np.empty((4, 4), dtype=np.int64)\n",
    "lnumpy_add(a, b, c_lnumpy)\n",
    "c_lnumpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tvm.script.ir_module\n",
    "class MyAdd:\n",
    "    @T.prim_func\n",
    "    def add(A: T.Buffer[(4, 4), \"int64\"],\n",
    "            B: T.Buffer[(4, 4), \"int64\"],\n",
    "            C: T.Buffer[(4, 4), \"int64\"]):\n",
    "        T.func_attr({\"global_symbol\": \"add\"})\n",
    "        for i, j in T.grid(4, 4):\n",
    "            with T.block(\"C\"):\n",
    "                vi, vj = T.axis.remap(\"SS\", [i, j])\n",
    "                C[vi, vj] = A[vi, vj] + B[vi, vj]\n",
    "\n",
    "\n",
    "rt_lib = tvm.build(MyAdd, target=\"llvm\")\n",
    "a_tvm = tvm.nd.array(a)\n",
    "b_tvm = tvm.nd.array(b)\n",
    "c_tvm = tvm.nd.array(np.empty((4, 4), dtype=np.int64))\n",
    "rt_lib[\"add\"](a_tvm, b_tvm, c_tvm)\n",
    "np.testing.assert_allclose(c_tvm.numpy(), c_np, rtol=1e-5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 广播加法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# init data\n",
    "a = np.arange(16).reshape(4, 4)\n",
    "b = np.arange(4, 0, -1).reshape(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 4,  4,  4,  4],\n",
       "       [ 8,  8,  8,  8],\n",
       "       [12, 12, 12, 12],\n",
       "       [16, 16, 16, 16]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# numpy version\n",
    "c_np = a + b\n",
    "c_np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tvm.script.ir_module\n",
    "class MyAdd:\n",
    "    @T.prim_func\n",
    "    def add(A: T.Buffer[(4, 4), \"int64\"],\n",
    "            B: T.Buffer[(4), \"int64\"],\n",
    "            C: T.Buffer[(4, 4), \"int64\"]):\n",
    "        T.func_attr({\"global_symbol\": \"add\"})\n",
    "        for i, j in T.grid(4, 4):\n",
    "            with T.block(\"C\"):\n",
    "                vi, vj = T.axis.remap(\"SS\", [i, j])\n",
    "                C[vi, vj] = A[vi, vj] + B[vj]\n",
    "\n",
    "rt_lib = tvm.build(MyAdd, target=\"llvm\")\n",
    "a_tvm = tvm.nd.array(a)\n",
    "b_tvm = tvm.nd.array(b)\n",
    "c_tvm = tvm.nd.array(np.empty((4, 4), dtype=np.int64))\n",
    "rt_lib[\"add\"](a_tvm, b_tvm, c_tvm)\n",
    "np.testing.assert_allclose(c_tvm.numpy(), c_np, rtol=1e-5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2D 卷积\n",
    "\n",
    "使用 NCHW 布局的卷积的数学定义：\n",
    "\n",
    "$$\n",
    "\\text{Conv}[b, k, i, j] =\n",
    "    \\sum_{d_i, d_j, q} A[b, q, \\text{strides} * i + d_i, \\text{strides} * j + d_j] * W[k, q, d_i, d_j],\n",
    "$$\n",
    "\n",
    "其中，$A$ 是输入张量，$W$ 是权重张量，$b$ 是批次索引，$k$ 是输出通道，$i$ 和 $j$ 是图像高度和宽度的索引，$d_i$ 和 $d_j$ 是权重的索引，$q$ 是输入通道，`strides` 是过滤器窗口的步幅。\n",
    "\n",
    "下面考虑简单的情况：`stride=1, padding=0`。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "N, CI, H, W, CO, K = 1, 1, 8, 8, 2, 3\n",
    "OUT_H, OUT_W = H - K + 1, W - K + 1\n",
    "data = np.arange(N*CI*H*W).reshape(N, CI, H, W)\n",
    "weight = np.arange(CO*CI*K*K).reshape(CO, CI, K, K)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[[ 474,  510,  546,  582,  618,  654],\n",
       "         [ 762,  798,  834,  870,  906,  942],\n",
       "         [1050, 1086, 1122, 1158, 1194, 1230],\n",
       "         [1338, 1374, 1410, 1446, 1482, 1518],\n",
       "         [1626, 1662, 1698, 1734, 1770, 1806],\n",
       "         [1914, 1950, 1986, 2022, 2058, 2094]],\n",
       "\n",
       "        [[1203, 1320, 1437, 1554, 1671, 1788],\n",
       "         [2139, 2256, 2373, 2490, 2607, 2724],\n",
       "         [3075, 3192, 3309, 3426, 3543, 3660],\n",
       "         [4011, 4128, 4245, 4362, 4479, 4596],\n",
       "         [4947, 5064, 5181, 5298, 5415, 5532],\n",
       "         [5883, 6000, 6117, 6234, 6351, 6468]]]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# torch version\n",
    "import torch\n",
    "\n",
    "data_torch = torch.Tensor(data)\n",
    "weight_torch = torch.Tensor(weight)\n",
    "conv_torch = torch.nn.functional.conv2d(data_torch, weight_torch)\n",
    "conv_torch = conv_torch.numpy().astype(np.int64)\n",
    "conv_torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tvm.script.ir_module\n",
    "class MyConv:\n",
    "  @T.prim_func\n",
    "  def conv(A: T.Buffer[(1, 1, 8, 8), \"int64\"], # 1,1,8,8\n",
    "          B: T.Buffer[(2, 1, 3, 3), \"int64\"], # 2,1,3,3\n",
    "          C: T.Buffer[(1, 2, 6, 6), \"int64\"]): # 1,2,6,6\n",
    "    T.func_attr({\"global_symbol\": \"conv\", \"tir.noalias\": True})\n",
    "    for n, c, h, w, i, k1, k2 in T.grid(N, CO, OUT_H, OUT_W, CI, K, K):\n",
    "      with T.block(\"C\"):\n",
    "        vn = T.axis.spatial(1, n)\n",
    "        vc = T.axis.spatial(2, c)\n",
    "        vh = T.axis.spatial(6, h)\n",
    "        vw = T.axis.spatial(6, w)\n",
    "        vi = T.axis.spatial(1, i)\n",
    "        vk1 = T.axis.reduce(3, k1)\n",
    "        vk2 = T.axis.reduce(3, k2)\n",
    "        with T.init():\n",
    "          C[vn, vc, vh, vw] = T.int64(0)\n",
    "        C[vn, vc, vh, vw] = C[vn, vc, vh, vw] + A[vn, vi, vh + vk1, vw + vk2] * B[vc, vi, vk1, vk2]\n",
    "\n",
    "\n",
    "rt_lib = tvm.build(MyConv, target=\"llvm\")\n",
    "data_tvm = tvm.nd.array(data)\n",
    "weight_tvm = tvm.nd.array(weight)\n",
    "conv_tvm = tvm.nd.array(np.empty((N, CO, OUT_H, OUT_W), dtype=np.int64))\n",
    "rt_lib[\"conv\"](data_tvm, weight_tvm, conv_tvm)\n",
    "np.testing.assert_allclose(conv_tvm.numpy(), conv_torch, rtol=1e-5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 如何变换 TensorIR"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.4 ('mlc': conda)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "aaffa6035209f65dc356111783931130a4c4995d4af64fce84e8309d82e28b87"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}