From 3b8b3ed8cc29116635852e75e0f75b6855a650d9 Mon Sep 17 00:00:00 2001
From: Adam Thompson <adamt@nvidia.com>
Date: Fri, 17 Jan 2020 16:16:56 -0500
Subject: [PATCH 1/4] Adding RAPIDS <-> DLFrameworks Jupyter Notebook

---
 .../gpu_interop_dlframeworks.ipynb            | 251 ++++++++++++++++++
 1 file changed, 251 insertions(+)
 create mode 100644 blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb

diff --git a/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb b/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
new file mode 100644
index 00000000..e8c3169c
--- /dev/null
+++ b/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
@@ -0,0 +1,251 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sharing is Caring: GPU Interoperability and <3 of All Frameworks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cupy as cp\n",
+    "import numpy as np\n",
+    "from numba import cuda\n",
+    "\n",
+    "# PyTorch 1.4 supports direct __cuda_array_interface__ handoff.\n",
+    "import torch\n",
+    "\n",
+    "# RFC: https://github.com/tensorflow/community/pull/180\n",
+    "# !pip install tfdlpack-gpu\n",
+    "import tfdlpack"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create GPU Arrays and Move to DL Frameworks with `__cuda_array_interface__`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Frameworks that leverage the `__cuda_array_interface__` can be seamlessly transferred from compatiable libraries (CuPy, Numba, cuSignal, etc) directly, without using an intermediate Tensor format like [DLPack](https://github.com/dmlc/dlpack)\n",
+    "\n",
+    "**CuPy <-> PyTorch**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CuPy GPU Array Pointer:  (139959965057024, False)\n",
+      "PyTorch GPU Tensor Pointer:  (139959965057024, False)\n",
+      "CuPy GPU Pointer:  (139959965057024, False)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# CuPy - GPU Array (like NumPy!)\n",
+    "gpu_arr = cp.random.rand(10_000, 10_000)\n",
+    "\n",
+    "# Look at pointer\n",
+    "print('CuPy GPU Array Pointer: ', gpu_arr.__cuda_array_interface__['data'])\n",
+    "\n",
+    "# Migrate from CuPy to PyTorch\n",
+    "torch_arr = torch.as_tensor(gpu_arr, device='cuda')\n",
+    "\n",
+    "# Look at pointer -- it's the same as the CuPy array above!\n",
+    "print('PyTorch GPU Tensor Pointer: ', torch_arr.__cuda_array_interface__['data'])\n",
+    "\n",
+    "# Migrate from PyTorch to CuPy\n",
+    "cupy_arr = cp.asarray(torch_arr)\n",
+    "\n",
+    "# Look at pointer\n",
+    "print('CuPy GPU Pointer: ', cupy_arr.__cuda_array_interface__['data'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Numba CUDA <-> PyTorch**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Numba GPU Array Pointer:  (139958354444288, False)\n",
+      "PyTorch GPU Tensor Pointer:  (139958354444288, False)\n",
+      "Numba GPU Pointer:  (139958354444288, False)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# NumPy - CPU Array\n",
+    "cpu_arr = np.random.rand(10_000, 10_000)\n",
+    "\n",
+    "# Use Numba to move to GPU\n",
+    "numba_gpu_arr = cuda.to_device(cpu_arr)\n",
+    "\n",
+    "# Migrate from Numba, used for custom CUDA JIT kernels to PyTorch\n",
+    "torch_arr_numba = torch.as_tensor(numba_gpu_arr, device='cuda')\n",
+    "\n",
+    "# Migrate from PyTorch back to Numba\n",
+    "numba_arr_from_torch = cuda.to_device(torch_arr_numba)\n",
+    "\n",
+    "# Pointer love again\n",
+    "print('Numba GPU Array Pointer: ', numba_gpu_arr.__cuda_array_interface__['data'])\n",
+    "print('PyTorch GPU Tensor Pointer: ', torch_arr_numba.__cuda_array_interface__['data'])\n",
+    "print('Numba GPU Pointer: ', numba_arr_from_torch.__cuda_array_interface__['data'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create GPU Arrays and Move to DL Frameworks with DLPack"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Not all major frameworks currently support the `__cuda_array_interface__`, cough, [TensorFlow](https://www.tensorflow.org/). We can use the aforementioned DLPack as a bridge between the GPU ecosystem and TensorFlow with `tfdlpack`. See [this RFC](https://github.com/tensorflow/community/pull/180) for more information.\n",
+    "\n",
+    "Allow GPU growth in TensorFlow or TF will take over the entire GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!export TF_FORCE_GPU_ALLOW_GROWTH=false"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**CuPy -> TensorFlow**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/job:localhost/replica:0/task:0/device:GPU:0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# CuPy - GPU Array (like NumPy!)\n",
+    "gpu_arr = cp.random.rand(10_000, 10_000)\n",
+    "\n",
+    "# Use CuPy's built in `toDlpack` function to move to a DLPack capsule\n",
+    "dlpack_arr = gpu_arr.toDlpack()\n",
+    "\n",
+    "# Use `tfdlpack` to migrate to TensorFlow\n",
+    "tf_tensor = tfdlpack.from_dlpack(dlpack_arr)\n",
+    "\n",
+    "# Confirm TF tensor is on GPU\n",
+    "print(tf_tensor.device)\n",
+    "\n",
+    "# Use `tfdlpack` to migrate back to CuPy\n",
+    "# !! Warning, this currently segfaults: https://github.com/VoVAllen/tf-dlpack/issues/12\n",
+    "# dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
+    "# cupy_arr = cp.asarray(dlpack_capsule.fromDlpack())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Numba CUDA -> TensorFlow**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/job:localhost/replica:0/task:0/device:GPU:0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Reset CUDA memory\n",
+    "cuda.close()\n",
+    "\n",
+    "# NumPy - CPU Array\n",
+    "cpu_arr = np.random.rand(10_000, 10_000)\n",
+    "\n",
+    "# Use Numba to move to GPU\n",
+    "numba_gpu_arr = cuda.to_device(cpu_arr)\n",
+    "\n",
+    "# Use CuPy's asarray function and toDlpack to create DLPack capsule. There are multiple other ways to do this (i.e. PyTorch Utils)\n",
+    "dlpack_arr = cp.asarray(numba_gpu_arr).toDlpack()\n",
+    "\n",
+    "# Migrate from Numba, used for custom CUDA JIT kernels to PyTorch\n",
+    "tf_tensor = tfdlpack.from_dlpack(dlpack_arr)\n",
+    "\n",
+    "# Confirm TF tensor is on GPU\n",
+    "print(tf_tensor.device)\n",
+    "\n",
+    "# Use `tfdlpack` to migrate back to CuPy\n",
+    "# !! Warning, this currently segfaults: https://github.com/VoVAllen/tf-dlpack/issues/12\n",
+    "# dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
+    "# numba_arr = cuda.to_device(dlpack_capsule.fromDlpack())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 4ad1f9d963c566a60828180d7b03ed7b64eff7b7 Mon Sep 17 00:00:00 2001
From: Adam Thompson <adamt@nvidia.com>
Date: Fri, 17 Jan 2020 16:35:59 -0500
Subject: [PATCH 2/4] Update README to include interoperability notebook

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 5ef26257..f73d0093 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,7 @@ Below is a listing of the notebooks in this repository.  Each row will tell you
 |cuspatial -> accelerate_geospatial_processing   | [accelerate_geospatial_processing](blog_notebooks/cuspatial/trajectory_clustering.ipynb)       | This is the notebook for blog [cuSpatial Accelerates Geospatial and Spatiotemporal Processing](https://medium.com/rapids-ai/releasing-cuspatial-to-accelerate-geospatial-and-spatiotemporal-processing-b686d8b32a9) by Milind Naphade, Jianting Zhang, Shuo Wang, Thomson Comer, Josh Paterson, Keith Kraus, Mark Harris, and Sujit Biswas. This notebook showcases cuSpatial benchmarking of directed Hausdorff distance for computing trajectory clustering on a large dataset. | SG | Trajectories Data and target_intersection.png |
 | randomforest   | [fruits_rf_notebook](blog_notebooks/randomforest/fruits_rf_notebook.ipynb)       | This is the notebook for blog [GPU-accelerated Random Forest]() by Vishal Mehta, Myrto Papadopoulou, Thejaswi Rao. This notebook showcases how to use GPU accelerated Random Forest Classification in cuML. The fruit dataset used is Self generated and used as an example in the [Blog](https://medium.com/rapids-ai/accelerating-random-forests-up-to-45x-using-cuml-dfb782a31bea) | SG | Self Generated
 | mortgage_deep_learning      | [mortgage_e2e_deep_learning](blog_notebooks/mortgage_deep_learning/mortgage_e2e_deep_learning.ipynb)     | **Archive Only.** This end to end notebook for the blog, [Using RAPIDS with PyTorch](https://medium.com/rapids-ai/using-rapids-with-pytorch-e602da018285), by Even Oldridge, combines the RAPIDS GPU data processing with a PyTorch deep learning neural network to predict mortgage loan delinquency.   | MG | [RAPIDS-IPUMS Mortgage Dataset](https://rapidsai.github.io/demos/datasets/mortgage-data)
+| interoperability   | [gpu_interop_dlframeworks](blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb)       | Demonstration of passing data between GPU enabled libraries and DL frameworks by Adam Thompson | SG | Self Generated |
 
 ## Conference Notebooks:
 

From 5071b5859bc6ed83f419e4194477ec7959ea7c73 Mon Sep 17 00:00:00 2001
From: Adam Thompson <adamt@nvidia.com>
Date: Sun, 19 Jan 2020 12:02:22 -0500
Subject: [PATCH 3/4] Updating numba and cupy to TF examples with fixed
 tfdlpack-gpu

---
 .../gpu_interop_dlframeworks.ipynb            | 33 +++++++++----------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb b/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
index e8c3169c..97214888 100644
--- a/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
+++ b/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
@@ -50,9 +50,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CuPy GPU Array Pointer:  (139959965057024, False)\n",
-      "PyTorch GPU Tensor Pointer:  (139959965057024, False)\n",
-      "CuPy GPU Pointer:  (139959965057024, False)\n"
+      "CuPy GPU Array Pointer:  (139618414493696, False)\n",
+      "PyTorch GPU Tensor Pointer:  (139618414493696, False)\n",
+      "CuPy GPU Pointer:  (139618414493696, False)\n"
      ]
     }
    ],
@@ -92,9 +92,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Numba GPU Array Pointer:  (139958354444288, False)\n",
-      "PyTorch GPU Tensor Pointer:  (139958354444288, False)\n",
-      "Numba GPU Pointer:  (139958354444288, False)\n"
+      "Numba GPU Array Pointer:  (139616803880960, False)\n",
+      "PyTorch GPU Tensor Pointer:  (139616803880960, False)\n",
+      "Numba GPU Pointer:  (139616803880960, False)\n"
      ]
     }
    ],
@@ -130,7 +130,7 @@
    "source": [
     "Not all major frameworks currently support the `__cuda_array_interface__`, cough, [TensorFlow](https://www.tensorflow.org/). We can use the aforementioned DLPack as a bridge between the GPU ecosystem and TensorFlow with `tfdlpack`. See [this RFC](https://github.com/tensorflow/community/pull/180) for more information.\n",
     "\n",
-    "Allow GPU growth in TensorFlow or TF will take over the entire GPU."
+    "Optional: Allow GPU growth in TensorFlow or TF will take over the entire GPU."
    ]
   },
   {
@@ -146,7 +146,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**CuPy -> TensorFlow**"
+    "**CuPy <-> TensorFlow**"
    ]
   },
   {
@@ -158,7 +158,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/job:localhost/replica:0/task:0/device:GPU:0\n"
+      "/job:localhost/replica:0/task:0/device:GPU:0\n",
+      "(139615998574592, False)\n"
      ]
     }
    ],
@@ -176,21 +177,20 @@
     "print(tf_tensor.device)\n",
     "\n",
     "# Use `tfdlpack` to migrate back to CuPy\n",
-    "# !! Warning, this currently segfaults: https://github.com/VoVAllen/tf-dlpack/issues/12\n",
-    "# dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
-    "# cupy_arr = cp.asarray(dlpack_capsule.fromDlpack())"
+    "dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
+    "cupy_arr = cp.fromDlpack(dlpack_capsule)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Numba CUDA -> TensorFlow**"
+    "**Numba CUDA <-> TensorFlow**"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -221,9 +221,8 @@
     "print(tf_tensor.device)\n",
     "\n",
     "# Use `tfdlpack` to migrate back to CuPy\n",
-    "# !! Warning, this currently segfaults: https://github.com/VoVAllen/tf-dlpack/issues/12\n",
-    "# dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
-    "# numba_arr = cuda.to_device(dlpack_capsule.fromDlpack())"
+    "dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
+    "numba_arr = cuda.to_device(cp.fromDlpack(dlpack_capsule))"
    ]
   }
  ],

From 1ad9492d1a574568687f922285038fbd196ce796 Mon Sep 17 00:00:00 2001
From: Adam Thompson <adamt@nvidia.com>
Date: Sun, 19 Jan 2020 12:09:20 -0500
Subject: [PATCH 4/4] fix typo and add pytorch <-> tensorflow

---
 .../gpu_interop_dlframeworks.ipynb            | 59 ++++++++++++++++---
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb b/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
index 97214888..de6527c6 100644
--- a/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
+++ b/blog_notebooks/interoperability/gpu_interop_dlframeworks.ipynb
@@ -50,9 +50,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CuPy GPU Array Pointer:  (139618414493696, False)\n",
-      "PyTorch GPU Tensor Pointer:  (139618414493696, False)\n",
-      "CuPy GPU Pointer:  (139618414493696, False)\n"
+      "CuPy GPU Array Pointer:  (140657360371712, False)\n",
+      "PyTorch GPU Tensor Pointer:  (140657360371712, False)\n",
+      "CuPy GPU Pointer:  (140657360371712, False)\n"
      ]
     }
    ],
@@ -92,9 +92,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Numba GPU Array Pointer:  (139616803880960, False)\n",
-      "PyTorch GPU Tensor Pointer:  (139616803880960, False)\n",
-      "Numba GPU Pointer:  (139616803880960, False)\n"
+      "Numba GPU Array Pointer:  (140655749758976, False)\n",
+      "PyTorch GPU Tensor Pointer:  (140655749758976, False)\n",
+      "Numba GPU Pointer:  (140655749758976, False)\n"
      ]
     }
    ],
@@ -158,8 +158,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/job:localhost/replica:0/task:0/device:GPU:0\n",
-      "(139615998574592, False)\n"
+      "/job:localhost/replica:0/task:0/device:GPU:0\n"
      ]
     }
    ],
@@ -220,10 +219,52 @@
     "# Confirm TF tensor is on GPU\n",
     "print(tf_tensor.device)\n",
     "\n",
-    "# Use `tfdlpack` to migrate back to CuPy\n",
+    "# Use `tfdlpack` to migrate back to Numba\n",
     "dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
     "numba_arr = cuda.to_device(cp.fromDlpack(dlpack_capsule))"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**PyTorch <-> TensorFlow**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/job:localhost/replica:0/task:0/device:GPU:0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import tfdlpack\n",
+    "from torch.utils import dlpack as th_dlpack\n",
+    "\n",
+    "# Torch - GPU Array\n",
+    "gpu_arr = torch.rand(10_000, 10_000).cuda()\n",
+    "\n",
+    "# Use Torch's DLPack function to get DLPack Capsule\n",
+    "dlpack_arr = th_dlpack.to_dlpack(gpu_arr)\n",
+    "\n",
+    "# Use `tfdlpack` to migrate to TensorFlow\n",
+    "tf_tensor = tfdlpack.from_dlpack(dlpack_arr)\n",
+    "\n",
+    "# Confirm TF tensor is on GPU\n",
+    "print(tf_tensor.device)\n",
+    "\n",
+    "# Use `tfdlpack` to migrate back to PyTorch\n",
+    "dlpack_capsule = tfdlpack.to_dlpack(tf_tensor)\n",
+    "torch_arr = th_dlpack.from_dlpack(dlpack_capsule)"
+   ]
   }
  ],
  "metadata": {