diff --git a/.github/workflows/linux_ci_script.sh b/.github/workflows/linux_ci_script.sh new file mode 100755 index 0000000..cbafe42 --- /dev/null +++ b/.github/workflows/linux_ci_script.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +docker container stop riallto_ci || true +docker container wait riallto_ci || true + +docker run -dit --rm --name riallto_ci \ + --cap-add=NET_ADMIN \ + -v $(pwd):/workspace \ + --device=/dev/accel/accel0:/dev/accel/accel0 \ + -w /workspace \ + riallto:latest \ + /bin/bash + +docker exec -i riallto_ci /bin/bash -c "source ~/.bashrc && cd /workspace/ && python3 -m pip install . && python3 -m pytest ./tests" + diff --git a/.github/workflows/nix_buildAndRun.yml b/.github/workflows/nix_buildAndRun.yml new file mode 100644 index 0000000..50c8988 --- /dev/null +++ b/.github/workflows/nix_buildAndRun.yml @@ -0,0 +1,44 @@ +name: Linux Riallto CI Testing + +on: + push: + branches: + - main + paths-ignore: + - 'README.md' + - 'CONTRIBUTING.md' + - 'LICENSE**' + - 'notebooks/**' + - 'scripts/utils/**' + - 'scripts/wsl/**' + - '**/*.ipynb' + - '.github/ISSUE_TEMPLATE/**' + - '.github/*.md' + pull_request: + types: [assigned, opened, synchronize, reopened] + paths-ignore: + - 'README.md' + - 'CONTRIBUTING.md' + - 'LICENSE**' + - 'notebooks/**' + - 'scripts/utils/**' + - 'scripts/wsl/**' + - '**/*.ipynb' + - '.github/ISSUE_TEMPLATE/**' + - '.github/*.md' + workflow_dispatch: + +jobs: + birman-tests: + name: not-birman-15-linux-tests + runs-on: self-hosted-linux + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 1 + + - name: run pytests + run: | + ls ./ + ./.github/workflows/linux_ci_script.sh diff --git a/.github/workflows/win_buildAndRun.yml b/.github/workflows/win_buildAndRun.yml index 9270910..0072ec9 100644 --- a/.github/workflows/win_buildAndRun.yml +++ b/.github/workflows/win_buildAndRun.yml @@ -31,7 +31,7 @@ on: jobs: birman-tests: name: not-birman-15-tests - runs-on: self-hosted + runs-on: self-hosted-windows steps: - name: Checkout uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index 4f8d1b8..913ee1a 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,10 @@ install/installed install/tarballs/*.tar.gz *-checkpoint.ipynb + +tests/images/*.svg + +/*.json +/*.xclbin +/*.mlir +/*.seq diff --git a/CITATION.cff b/CITATION.cff index 56985e8..578e677 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -30,6 +30,6 @@ authors: - family-names: "Joshua" given-names: "Lu" title: "Riallto: an open-source exploration framework for first time users of the AMD Ryzen AI NPU" -version: 1.0 -date-released: 2021-12-12 -url: "https://github.com/AMDResearch/Riallto" \ No newline at end of file +version: 1.1 +date-released: 2023-12-12 +url: "https://github.com/AMDResearch/Riallto" diff --git a/README.md b/README.md index b8521c3..ab82a29 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ Riallto is an open source project that aims to bridge the gap between newcomers The educational examples and software in this repository will enable you to interact with the NPU and learn how to write custom applications with this novel accelerator. -Note that NPUs can also be referred to as Inference Processing Units, or IPUs. You may see the NPU referred to as an "IPU" in some Ryzen AI documentation. Once the Ryzen AI driver is installed on your computer, you will see a device named "IPU" in the Windows Device Manager." +Note that NPUs can also be referred to as Inference Processing Units, or IPUs. You may see the NPU referred to as an "IPU" in some Ryzen AI documentation. On Windows once the Ryzen AI driver is installed on your computer, you will see a device named "IPU" in the Windows Device Manager." -## Quick Start +## Quick Start (Windows) The Riallto installer is separated into 'Lite' and 'Full' installation versions. The 'Lite' version has all the essential packages required for the Riallto and ONNX runtimes on the NPU to get started with prebuilt vision applications. The 'Full' version enables developers to write their own custom applications, which requires additional tools. The steps below will allow you to get quickly started using the 'Lite' version of Riallto. For details on requirements and installation for the 'Full' version, follow the instructions in the [Install Riallto](https://riallto.ai/install-riallto.html) guide. -* [Download and unzip the the v10.1109.8.100 IPU driver](https://account.amd.com/en/forms/downloads/ryzen-ai-software-platform-xef.html?filename=ipu_stack_rel_silicon_1.0.zip). You will need to create an AMD account if you do not have one. +* [Download and unzip the the v10.1109.8.128 IPU driver](https://account.amd.com/en/forms/downloads/ryzen-ai-software-platform-xef.html?filename=ipu_stack_rel_silicon_prod_1.1.zip). You will need to create an AMD account if you do not have one. * Open a Windows Command Prompt, or Powershell, as administrator. * Change directory to the unzipped IPU driver directory, then run the `amd_install_kipudrv.bat` file. Press Enter if prompted. * Confirm the IPU driver has installed correctly by running the following command in a Powershell terminal and checking the output matches that of the figure below: @@ -23,7 +23,7 @@ The steps below will allow you to get quickly started using the 'Lite' version o ![IPU driver dialog](docs/images/ipu_driver.png) -* [Download and unzip the Riallto installer](https://www.xilinx.com/bin/public/openDownload?filename=Riallto-v1.0.zip). +* [Download and unzip the Riallto installer](https://www.amd.com/bin/public/amdOpenDownload?filename=Riallto-v1.1.zip). * Navigate to the `installer.exe` using the File Explorer and run as administrator, making sure the 'Lite' install option is selected when prompted, as shown in the figure below. ![Riallto installer options](docs/images/installer.png) @@ -34,6 +34,10 @@ The installer places a shortcut on the desktop, which allows you to launch Riall Note that the notebooks from sections 1, 2, 3, and 5 can be run using the 'Lite' install option. A 'Full' install is required to run the notebooks from section 4. See instructions in the [Install Riallto](https://riallto.ai/install-riallto.html) guide for more details. +## Quick Start (Linux) + +Along with Windows support there is also support to install Riallto in Ubuntu 24.04 provided you upgrade to the 6.10 Linux Kernel. Steps for the installation process for this can be found [here](./scripts/linux). + ## Contribute Contributions to this repository are welcome. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for how to improve Riallto. diff --git a/docs/images/ipu_driver.png b/docs/images/ipu_driver.png index 9b5ffed..8a7f312 100644 Binary files a/docs/images/ipu_driver.png and b/docs/images/ipu_driver.png differ diff --git a/notebooks/1_0_Introduction.ipynb b/notebooks/1_0_Introduction.ipynb index 7ccf074..3e6607b 100644 --- a/notebooks/1_0_Introduction.ipynb +++ b/notebooks/1_0_Introduction.ipynb @@ -74,6 +74,8 @@ "\n", "Demonstrates the Ryzen AI NPU in action, running real-time AI workloads in Microsoft's Windows Studio Effects (WSE).\n", "\n", + "**This is only supported on Windows.**\n", + "\n", "**[1. Windows Studio Effects demo](2_1_MS_Windows_Studio_Effects.ipynb)**\n" ] }, @@ -135,6 +137,8 @@ "\n", "Introduces the PyTorch-ONNX flow for running AI applications on the Ryzen AI NPU.\n", "\n", + "**This is currently not supported on Linux.** \n", + "\n", "**[1. Machine Learning Inference with PyTorch and ONNX](5_1_pytorch_onnx_inference.ipynb)**\n", "\n", "**[2. Machine Learning re-training with PyTorch and ONNX](5_2_pytorch_onnx_re-train.ipynb)**\n", @@ -184,7 +188,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.10.6" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/notebooks/2_1_MS_Windows_Studio_Effects.ipynb b/notebooks/2_1_MS_Windows_Studio_Effects.ipynb old mode 100644 new mode 100755 index e649880..c9907c2 --- a/notebooks/2_1_MS_Windows_Studio_Effects.ipynb +++ b/notebooks/2_1_MS_Windows_Studio_Effects.ipynb @@ -13,11 +13,23 @@ "\n", "* Demonstrate the NPU in action using the Windows Studio Effects\n", "\n", + "\n", "## References\n", "\n", "**[Windows Studio Effects](https://support.microsoft.com/en-us/windows/manage-cameras-with-camera-settings-in-windows-11-97997ed5-bb98-47b6-a13d-964106997757#ID0EDBBF)**\n", " \n", - "---\n" + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "This is only supported on Windows and is not supported on the Linux release of Riallto\n", + "\n", + "
" ] }, { @@ -219,7 +231,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/notebooks/4_1_software_framework.ipynb b/notebooks/4_1_software_framework.ipynb old mode 100644 new mode 100755 index 286f590..9da6c08 --- a/notebooks/4_1_software_framework.ipynb +++ b/notebooks/4_1_software_framework.ipynb @@ -52,7 +52,7 @@ "* **AIEtools**\n", " * These are the compilation tools used to build the Ryzen AI NPU application. \n", "\n", - "The AIETools tools are Linux based. For Windows laptops, they run in [Windows Subsystem for Linux 2 (WSL 2)](https://learn.microsoft.com/en-us/windows/wsl/about). If you have installed Riallto and are reading this material on your laptop as a Jupyter notebook, WSL 2 should have been already installed and enabled on your system. " + "The AIETools tools are Linux based. For Windows laptops, they run in [Windows Subsystem for Linux 2 (WSL 2)](https://learn.microsoft.com/en-us/windows/wsl/about). If you have installed Riallto and are reading this material on your Windows laptop as a Jupyter notebook, WSL 2 should have been already installed and enabled on your system. WSL 2 is not required for the Linux installation. On Linux the AIETools tools are contained within a docker container." ] }, { @@ -62,7 +62,7 @@ }, "source": [ "
\n", - "To check that the WSL 2 instance, where the Riallto tools are installed, is enabled on your system, run the following cell:\n", + "On Windows to check that the WSL 2 instance, where the Riallto tools are installed, is enabled on your system, run the following cell (Note: this will not work on a Linux installation of Riallto):\n", "
" ] }, @@ -103,6 +103,32 @@ "The output of this cell should report the OS version where the 'Riallto' tools are installed. If you do not see this, or if you installed the **Lite** version of Riallto, please refer to the [Riallto installation instructions](https://www.riallto.ai/install-riallto.html) to install the **Full** version of Riallto." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "On Ubuntu to check that your Linux Kernel version is >6.10 which Riallto requires, run the following cell:\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6.10.0-061000rc2-generic\n" + ] + } + ], + "source": [ + "! uname -r" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -280,7 +306,7 @@ "\n", "This will construct a passthrough npu.build.Kernel object that can be used within\n", "a callgraph to construct a complete application. \n", - "\u001b[1;31mFile:\u001b[0m c:\\users\\riallto\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\npu\\magic.py" + "\u001b[1;31mFile:\u001b[0m c:\\users\\shane\\appdata\\local\\riallto\\riallto_venv\\lib\\site-packages\\npu\\magic.py" ] }, "metadata": {}, @@ -476,7 +502,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.12.3" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/notebooks/5_1_pytorch_onnx_inference.ipynb b/notebooks/5_1_pytorch_onnx_inference.ipynb index a2d1a13..359e472 100644 --- a/notebooks/5_1_pytorch_onnx_inference.ipynb +++ b/notebooks/5_1_pytorch_onnx_inference.ipynb @@ -14,6 +14,7 @@ "* Show the ONNX model generation and inference flow on the NPU\n", " \n", "* Deploy a quantized ResNet-50 model onto Ryzen AI NPU for inference\n", + "\n", " \n", "## References\n", "\n", @@ -30,6 +31,17 @@ "---\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "This is not currently supported on the Linux release of Riallto.\n", + "\n", + "
" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -850,7 +862,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.10.6" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/notebooks/5_2_pytorch_onnx_re-train.ipynb b/notebooks/5_2_pytorch_onnx_re-train.ipynb index 2e6d1cf..cb72927 100644 --- a/notebooks/5_2_pytorch_onnx_re-train.ipynb +++ b/notebooks/5_2_pytorch_onnx_re-train.ipynb @@ -15,6 +15,7 @@ "\n", "* Learn how to quantize an ONNX model to run inference on the NPU\n", "\n", + "\n", "## References\n", "\n", "**[Ryzen AI Software Platform](https://ryzenai.docs.amd.com/en/latest/getstartex.html)**\n", @@ -26,6 +27,17 @@ "---\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "This is not currently supported on the Linux release of Riallto.\n", + "\n", + "
" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -698,7 +710,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.10.6" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/npu/__init__.py b/npu/__init__.py index 56a7391..00e7e4a 100644 --- a/npu/__init__.py +++ b/npu/__init__.py @@ -26,13 +26,16 @@ """ from .utils.test_device import get_driver_version, version_to_tuple +import platform __supported_driver__ = "10.1109.8.100" -__installed_driver__ = get_driver_version() -if version_to_tuple(__installed_driver__) < version_to_tuple(__supported_driver__): - raise ValueError(f"""Detected driver: {__installed_driver__}, supported driver version is >={__supported_driver__}, - go to https://riallto.ai/prerequisites-driver.html for driver setup instructions.""") +if platform.system() == 'Windows': + __installed_driver__ = get_driver_version() + + if version_to_tuple(__installed_driver__) < version_to_tuple(__supported_driver__): + raise ValueError(f"""Detected driver: {__installed_driver__}, supported driver version is >={__supported_driver__}, + go to https://riallto.ai/prerequisites-driver.html for driver setup instructions.""") from .repr_dict import ReprDict from .magic import kernel_magic diff --git a/npu/build/appbuilder.py b/npu/build/appbuilder.py index a71ce2b..37bdf3e 100644 --- a/npu/build/appbuilder.py +++ b/npu/build/appbuilder.py @@ -9,6 +9,7 @@ from .appxclbinbuilder import AppXclbinBuilder from .utils import check_wsl_install from typing import Optional +import platform import json class AppBuilder: @@ -41,7 +42,8 @@ class AppBuilder: def __init__(self, name=None) -> None: """Return a new AppBuilder object.""" - check_wsl_install() + if platform.system() == 'Windows': + check_wsl_install() self.name = type(self).__name__ if name is None else name self.ab = AppXclbinBuilder() @@ -67,16 +69,16 @@ def callgraph(self): def to_metadata(self, *args): """ The application is converted into the AppMetadata after tracing the callgraph() call.""" self.previous_build_args = args - self.kernels, self.connections = self.fxtracer.to_trace(*args) + self.kernels, self.connections = self.fxtracer.to_trace(*args) return AppMetada(self.name, - self.unique_named(self.kernels), + self.unique_named(self.kernels), self.unique_named(self.connections), self.to_sequence()) def to_handoff(self, *args, file=None): """ Converts the application into a serializable JSON file.""" - self.previous_build_args = args + self.previous_build_args = args with open(file, 'w') as f: json.dump(self.to_json(*args), f, default = lambda o: '') @@ -84,10 +86,10 @@ def to_json(self, *args): """ Converts the application into JSON.""" self.previous_build_args = args return self.to_metadata(*args).to_json() - + @property def metadata(self, *args): - """ Generates the application JSON and displays inside a IPython environment.""" + """ Generates the application JSON and displays inside a IPython environment.""" from npu import ReprDict self.validate_previous_build_args() return ReprDict(self.to_json(*self.previous_build_args), rootname=self.name) @@ -104,7 +106,7 @@ def to_sequence(self): def display(self)->None: """ Generates the application SVG and displays inside a IPython environment.""" - from npu.utils.appviz import AppViz + from npu.utils.appviz import AppViz self.validate_previous_build_args() _viz = AppViz(self.to_json(*self.previous_build_args)) _viz.show @@ -131,15 +133,15 @@ def build(self, *args, debug=False, mlir:Optional[str]=None): self.ab.build(self.name, f"{self.name}.mlir", self.kernels, debug) else: self.ab.build(self.name, mlir, self.kernels, debug) - + def __add__(self, app_component): if isinstance(app_component, Connection): self.merge_applications(app_component.kernels, [app_component]) return self - + if isinstance(app_component, AppBuilder): self.merge_applications(app_component.kernels, app_component.connections) - return self + return self raise TypeError(f"{app_component} of type {type(app_component)} is not supported") @@ -150,7 +152,7 @@ def validate_previous_build_args(self): def merge_applications(self, newkernels, newconnections): self.connections.extend(newconnections) - self.kernels.extend(newkernels) + self.kernels.extend(newkernels) def unique_named(self, objs): unique_objs = list(set(objs)) @@ -160,4 +162,4 @@ def unique_named(self, objs): unique_objs_byname_list.sort(key= lambda x : x.name) - return unique_objs_byname_list + return unique_objs_byname_list diff --git a/npu/build/kernelbuilder.py b/npu/build/kernelbuilder.py index df36cbe..a66deed 100644 --- a/npu/build/kernelbuilder.py +++ b/npu/build/kernelbuilder.py @@ -36,7 +36,7 @@ class KernelObjectBuilder(WSLBuilder): def clear_cache(cls): if os.path.exists(cls.prebuilt_path) and glob.glob(os.path.join(cls.prebuilt_path, '*.o')): prebuilt_files = os.path.join(cls.prebuilt_path, "*.o") - WSLBuilder()._wslcall(f"{wsl_prefix()}rm", [f"{wslpath(prebuilt_files)}"]) + WSLBuilder()._wslcall(f"{wsl_prefix()}rm", ["-rf",f"{wslpath(prebuilt_files)}"]) def __init__(self, name, srccode, srcfile) -> None: """Return a new KernelObjectBuilder object.""" @@ -71,10 +71,9 @@ def build(self, debug=False): if self.srcfile is not None or self.getheaders: for extension in ['*.h', '*.hh', '*.hpp', '*.hxx', '*.h++']: - if bool(glob.glob(os.path.join(self.srcpath, extension))): - headerfiles = os.path.join(self.srcpath, extension) + for hfile in glob.glob(os.path.join(self.srcpath, extension)): self._wslcall(f"{wsl_prefix()}cp", - [f"{wslpath(headerfiles)}", + [f"{wslpath(hfile)}", f"{wslpath(self.build_path)}"], debug) self._wslcall(f"{wsl_prefix()}bash", [f"{wslpath(self.build_path)}/kernel_build.sh", f"{self.name}"], debug) diff --git a/npu/build/mlirbuilder.py b/npu/build/mlirbuilder.py index 5f82e4e..c4a3dc4 100644 --- a/npu/build/mlirbuilder.py +++ b/npu/build/mlirbuilder.py @@ -43,7 +43,7 @@ def __init__(self, metadata, config=(4,1,1)): MLIRConnect.reset_id() self.aietiles, self.memtiles, self.sdmatiles = self._parse_tiles(config) - self.tiles = self.sdmatiles | self.memtiles | self.aietiles + self.tiles = {**self.sdmatiles, **self.memtiles, **self.aietiles} self._map_kernels_to_tiles() self._cons_src2dst = self._populate_src2dst_cons_dict() diff --git a/npu/build/mlirsequencebuilder.py b/npu/build/mlirsequencebuilder.py index c9f295e..ddf873a 100644 --- a/npu/build/mlirsequencebuilder.py +++ b/npu/build/mlirsequencebuilder.py @@ -202,7 +202,10 @@ def _generate_memcpy_nd_from_transfer(self, t:List[List[int]])->str: def _change_to_int32_offset(self, offset, dtype): - itemsize = int(str(dtype)[1:])//8 + if dtype == "bf16": + itemsize = 2 + else: + itemsize = int(str(dtype)[1:])//8 if offset % 4 != 0: raise ValueError(f"Must be divisible by 4 {offset=}") @@ -217,9 +220,16 @@ def _change_to_int32_offset(self, offset, dtype): def _change_to_int32(self, shape, dtype): mod_shape = np.zeros(shape).squeeze() - new_shape = list(mod_shape.shape[:]) + if shape == (1,): + new_shape = [1] + else: + new_shape = list(mod_shape.shape[:]) + + if dtype == "bf16": + itemsize = 2 + else: + itemsize = int(str(dtype)[1:])//8 - itemsize = int(str(dtype)[1:])//8 if not new_shape and itemsize == 4: return (1,) @@ -292,7 +302,7 @@ def _populate_constants_table(self)->None: self._constants_table = {} self._add_constant(0) self._add_constant(1) - ubs = self._ingress_ub | self._egress_ub + ubs = {**self._ingress_ub, **self._egress_ub} for ub in ubs.values(): self._add_constant(max(ub.tilesizes)) self._add_constant(len(ub.dim)) @@ -330,6 +340,7 @@ def _filter_ub(self)->None: def _get_ub_dtype_mlir_str(self, ub)->str: typemap = { + "bfloat16" : "bf16", "uint8" : "i8", "uint16" : "i16", "uint32" : "i32", @@ -370,6 +381,7 @@ def _populate_ingress_egress(self)->None: if s["snkkernelname"] in self._userbuffers: if s["snkkernelname"] not in self._egress_ub: + c_ub = self._userbuffers[s['snkkernelname']] self._egress_ub[s["snkkernelname"]] = UBDataMovement( ubname=s["snkkernelname"], symname=s["name"], @@ -383,8 +395,7 @@ def _populate_ingress_egress(self)->None: self._egress_ub[s["snkkernelname"]].dim.append([0]) else: self._egress_ub[s["snkkernelname"]].dim.append(s["snkslices"]) - self._ingress_egress_ub = self._ingress_ub | self._egress_ub - + self._ingress_egress_ub = {**self._ingress_ub, **self._egress_ub} def _check(self)->None: for _,ub in self._ingress_ub.items(): diff --git a/npu/build/utils.py b/npu/build/utils.py index 0297fcd..b0ca6cb 100644 --- a/npu/build/utils.py +++ b/npu/build/utils.py @@ -1,16 +1,15 @@ # Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. # SPDX-License-Identifier: MIT -import os -import re +import platform import subprocess def is_win()->bool: - """ Returns true if we are running this on windows.""" - return os.name == "nt" + """ Returns true if we are running this on Windows.""" + return platform.system() == 'Windows' def is_win_path(path:str)->bool: - """ Returns true if the path above is a windows path """ + """ Returns true if the path above is a Windows path """ newpath = path.split('\\') return newpath[0].endswith(':') diff --git a/npu/lib/applications/binaries/color_detect_1080p.xclbin b/npu/lib/applications/binaries/color_detect_1080p.xclbin index 172a3e6..039c1d0 100644 Binary files a/npu/lib/applications/binaries/color_detect_1080p.xclbin and b/npu/lib/applications/binaries/color_detect_1080p.xclbin differ diff --git a/npu/lib/applications/binaries/color_detect_720p.xclbin b/npu/lib/applications/binaries/color_detect_720p.xclbin index ef339cf..5e9d296 100644 Binary files a/npu/lib/applications/binaries/color_detect_720p.xclbin and b/npu/lib/applications/binaries/color_detect_720p.xclbin differ diff --git a/npu/lib/applications/binaries/color_threshold_v1_1080p.xclbin b/npu/lib/applications/binaries/color_threshold_v1_1080p.xclbin index eb10384..6b1ffb7 100644 Binary files a/npu/lib/applications/binaries/color_threshold_v1_1080p.xclbin and b/npu/lib/applications/binaries/color_threshold_v1_1080p.xclbin differ diff --git a/npu/lib/applications/binaries/color_threshold_v1_720p.xclbin b/npu/lib/applications/binaries/color_threshold_v1_720p.xclbin index 3c4aa4e..670bd8e 100644 Binary files a/npu/lib/applications/binaries/color_threshold_v1_720p.xclbin and b/npu/lib/applications/binaries/color_threshold_v1_720p.xclbin differ diff --git a/npu/lib/applications/binaries/color_threshold_v2_1080p.xclbin b/npu/lib/applications/binaries/color_threshold_v2_1080p.xclbin index 358169b..a6b5578 100644 Binary files a/npu/lib/applications/binaries/color_threshold_v2_1080p.xclbin and b/npu/lib/applications/binaries/color_threshold_v2_1080p.xclbin differ diff --git a/npu/lib/applications/binaries/color_threshold_v2_720p.xclbin b/npu/lib/applications/binaries/color_threshold_v2_720p.xclbin index a87bec8..f2ff905 100644 Binary files a/npu/lib/applications/binaries/color_threshold_v2_720p.xclbin and b/npu/lib/applications/binaries/color_threshold_v2_720p.xclbin differ diff --git a/npu/lib/applications/binaries/denoise_data_parallel_1080p.xclbin b/npu/lib/applications/binaries/denoise_data_parallel_1080p.xclbin index fcffd7c..440f370 100644 Binary files a/npu/lib/applications/binaries/denoise_data_parallel_1080p.xclbin and b/npu/lib/applications/binaries/denoise_data_parallel_1080p.xclbin differ diff --git a/npu/lib/applications/binaries/denoise_data_parallel_720p.xclbin b/npu/lib/applications/binaries/denoise_data_parallel_720p.xclbin index 65391f2..3144a35 100644 Binary files a/npu/lib/applications/binaries/denoise_data_parallel_720p.xclbin and b/npu/lib/applications/binaries/denoise_data_parallel_720p.xclbin differ diff --git a/npu/lib/applications/binaries/denoise_task_parallel_1080p.xclbin b/npu/lib/applications/binaries/denoise_task_parallel_1080p.xclbin index dd0bdcd..937a4c4 100644 Binary files a/npu/lib/applications/binaries/denoise_task_parallel_1080p.xclbin and b/npu/lib/applications/binaries/denoise_task_parallel_1080p.xclbin differ diff --git a/npu/lib/applications/binaries/denoise_task_parallel_720p.xclbin b/npu/lib/applications/binaries/denoise_task_parallel_720p.xclbin index 23fcf04..f9d6d63 100644 Binary files a/npu/lib/applications/binaries/denoise_task_parallel_720p.xclbin and b/npu/lib/applications/binaries/denoise_task_parallel_720p.xclbin differ diff --git a/npu/lib/applications/binaries/edge_detect_1080p.xclbin b/npu/lib/applications/binaries/edge_detect_1080p.xclbin index 7c20fa8..f3f3763 100644 Binary files a/npu/lib/applications/binaries/edge_detect_1080p.xclbin and b/npu/lib/applications/binaries/edge_detect_1080p.xclbin differ diff --git a/npu/lib/applications/binaries/edge_detect_720p.xclbin b/npu/lib/applications/binaries/edge_detect_720p.xclbin index 881a9b9..4954936 100644 Binary files a/npu/lib/applications/binaries/edge_detect_720p.xclbin and b/npu/lib/applications/binaries/edge_detect_720p.xclbin differ diff --git a/npu/lib/applications/videoapps.py b/npu/lib/applications/videoapps.py index ad6e2b5..ccf14f7 100644 --- a/npu/lib/applications/videoapps.py +++ b/npu/lib/applications/videoapps.py @@ -8,6 +8,8 @@ import inspect import numpy as np import colorsys +import time +import platform import matplotlib.pyplot as plt import ipywidgets as widget from IPython.display import display @@ -42,7 +44,7 @@ def _get_full_path(xclbin: str = None) -> str: def _find_closest_resolution(cam_h: int = None, - cam_w: int = None) -> tuple[int, int]: + cam_w: int = None): """Find the closes available resolution Find the nearest resolution in each dimension and use it if it matches @@ -65,7 +67,7 @@ def _find_closest_resolution(cam_h: int = None, return resolution -def _get_webcam_resolution(cap) -> tuple[int, int]: +def _get_webcam_resolution(cap): """Get webcam resolution""" cam_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cam_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) @@ -158,7 +160,9 @@ def __init__(self, filename, videosource: Union[int, str] = 0, def _get_resolution(self, videosource): if isinstance(videosource, int): - if videosource == 0: + if platform.system() == 'Linux': + prop = cv2.CAP_V4L2 + elif videosource == 0: prop = cv2.CAP_MSMF else: prop = cv2.CAP_DSHOW @@ -178,19 +182,9 @@ def _get_resolution(self, videosource): self.cam_h, self.cam_w = \ _find_closest_resolution(self.cam_h, self.cam_w) - def _get_resolution_file(self, filename): - self._cap = cv2.VideoCapture(filename) - - self._resize = not _set_supported_webcam_resolution(self._cap) - self.cam_h, self.cam_w = _get_webcam_resolution(self._cap) - - self._camres = self.cam_w, self.cam_h - if self._resize: - self.cam_h, self.cam_w = \ - _find_closest_resolution(self.cam_h, self.cam_w) - def start(self): """Start the video processing""" + time.sleep(0.3) ret, _ = self._cap.read() if not self._cap.isOpened() or not ret: self._cap.release() diff --git a/npu/lib/kernels/addweighted.py b/npu/lib/kernels/addweighted.py index 1afd577..dd4e766 100644 --- a/npu/lib/kernels/addweighted.py +++ b/npu/lib/kernels/addweighted.py @@ -18,7 +18,7 @@ class AddWeighted(): """ def __new__(cls, *args): - cpp = str(Path(__file__).parent / "cpp" / "addweighted.cpp") + cpp = str(Path(__file__).parent / "cpp" / "addWeighted.cpp") return KernelObjCall(cpp, cls.behavioralfx, *args) def behavioralfx(self): @@ -40,5 +40,5 @@ class AddWeightedScalar(): """Scalar implementation of the `cv2.AddWeighted` function""" def __new__(cls, *args): - cpp = str(Path(__file__).parent / "cpp" / "addweighted_scalar.cpp") + cpp = str(Path(__file__).parent / "cpp" / "addWeighted_scalar.cpp") return KernelObjCall(cpp, AddWeighted.behavioralfx, *args) diff --git a/npu/runtime/__init__.py b/npu/runtime/__init__.py index 5718319..f5ef4df 100644 --- a/npu/runtime/__init__.py +++ b/npu/runtime/__init__.py @@ -44,8 +44,9 @@ """ +import platform import os -if os.name == 'nt': +if platform.system() == 'Windows': os.add_dll_directory(os.path.join('C:\\', 'Windows', 'System32', 'AMD')) from .pyxrt import device, xclbin diff --git a/npu/runtime/apprunner.py b/npu/runtime/apprunner.py index e7fba71..4f540c2 100644 --- a/npu/runtime/apprunner.py +++ b/npu/runtime/apprunner.py @@ -21,6 +21,9 @@ from npu.utils.xbutil import XBUtil import ipywidgets as widget from IPython.display import display +import subprocess +import platform +from warnings import warn dtype_to_maxval = { "uint32_t" : 4294967296, @@ -48,7 +51,6 @@ def _updatebar(huebar: widget.Image , min:int, max: int): class IPUAppAlreadyLoaded(Exception): pass - class AppRunner: """This class abstracts the necessary setup steps of an NPU application and enables a simple interface with the accelerator @@ -60,26 +62,29 @@ class AppRunner: xclbin_name : str Name of xclbin file fw_sequence : str - Name of the firmware sequence, typically same name as the + Name of the firmware sequence, typically same name as the xclbin file handoff : str - Name of the metadata handoff file, typically a .json file + Name of the metadata handoff file, typically a .json file with the same name as the xclbin and firmware files Note ---- - This class is primarily built on top of the python bindings to + This class is primarily built on top of the python bindings to XRT (Xilinx Runtime Library). You can read more about the runtime in the documentation at https://xilinx.github.io/XRT/. - + """ def __init__(self, xclbin_name:str, fw_sequence:Optional[str]=None, handoff:Optional[str]=None): """Returns a new AppRunner object.""" self._process_handoff_metadata(xclbin_name, handoff) - self.xbutil = XBUtil() - self._stability_checks() + + # Extra stability checks for windows + if platform.system() == "Windows": + self.xbutil = XBUtil() + self._stability_checks() # If sequence given, use it, otherwise look for same name as xclbin if fw_sequence: @@ -88,6 +93,9 @@ def __init__(self, xclbin_name:str, fw_sequence:Optional[str]=None, handoff:Opti self.sequence = ipr.Sequence(os.path.splitext(xclbin_name)[0] + '.seq', first_parse=True) xclbin = ipr.xclbin(xclbin_name) + + # Run the script to allow this unsigned firmware xclbin to run + self.kernel_params = self._get_kernel_info(xclbin_name) self.device = self._get_device() @@ -95,7 +103,7 @@ def __init__(self, xclbin_name:str, fw_sequence:Optional[str]=None, handoff:Opti self.device.register_xclbin(xclbin) except RuntimeError as e: print(str(e)) - print("""Failed to register xclbin. Is another application running? + print("""Failed to register xclbin. Is another application running? Try shutting down/restarting all other jupyter notebooks and try again.""") raise @@ -111,17 +119,22 @@ def __init__(self, xclbin_name:str, fw_sequence:Optional[str]=None, handoff:Opti self._allocated_arrays = [] def _get_device(self): - """ Checks to see if there is already an AppRunner that exists. + """ Checks to see if there is already an AppRunner that exists. if there is gets the device from previously allocated AppRunner or else creates a new device. """ + for obj in gc.get_objects(): - if isinstance(obj, type(self)) and (obj != self): - if getattr(obj, "device", None): - return obj.device + try: + if isinstance(obj, type(self)) and (obj != self): + if getattr(obj, "device", None): + return obj.device + except Exception as e: + warn(f"Encountered an exception during isinstance check: {e}") + continue return ipr.device(0) - def _process_handoff_metadata(self, xclbin_name:str, handoff:Optional[str]=None)->None: + def _process_handoff_metadata(self, xclbin_name:str, handoff:Optional[str]=None)->None: """ Parses the handoff metadata if it exists and uses that to set the kernel name. If no metadata exists then parse the kernel name from the xclbin. @@ -146,7 +159,7 @@ def metadata(self): def _stability_checks(self)->None: """ Checks to ensure that the NPU is in a sensible state before - trying to load the application + trying to load the application """ if self.xbutil.app_count >= 4: raise RuntimeError("There is currently no free space on the NPU " @@ -184,7 +197,7 @@ def _apply_metadata(self): "dtype" : port["c_dtype"], "init_val" : port["value"] } - self.rtps[k["name"]][port["name"]] = pdict + self.rtps[k["name"]][port["name"]] = pdict setattr(d, port["name"], self.sequence.mlir_rtps[d._tloc][idx]) idx = idx + 1 @@ -513,7 +526,9 @@ def call(self, *kwargs): run = self.kernel(self.instr, len(self.sequence.buffer), *run_args) ert_state = run.wait(5000) # 5 second timeout - if ert_state.value != 4: + + # Currently this check is only working with the windows bindings + if (ert_state.value != 4) and (platform.system() == "Windows"): raise RuntimeError(f"Returned state is {ert_state}: {ert_state.value}, expected ") def __delete__(self, instance): @@ -548,7 +563,7 @@ def Signature(self): return self.kernel_params['signature'] class PynqBuffer(np.ndarray): - """This is a subclass of numpy.ndarray. This class is + """This is a subclass of numpy.ndarray. This class is intended to be constructed using the AppRunner.allocate() method and should not be used as a standalone. @@ -559,12 +574,12 @@ class PynqBuffer(np.ndarray): cacheable: bool Typically host buffers will not be cacheable, but instr buffers will always be. - + Note ---- - It's important to free the buffer memory after use -- this + It's important to free the buffer memory after use -- this can be done with the `free_memory()` method. The AppRunner - class tracks the allocated buffers and clears the buffers + class tracks the allocated buffers and clears the buffers automatically when the object has been deleted. """ @@ -612,4 +627,4 @@ def __repr__(self): def __str__(self): if self.bo is None: return "" - return super().__str__() \ No newline at end of file + return super().__str__() diff --git a/npu/runtime/pyxrt.so b/npu/runtime/pyxrt.so new file mode 100755 index 0000000..e1618b6 Binary files /dev/null and b/npu/runtime/pyxrt.so differ diff --git a/npu/utils/appviz.py b/npu/utils/appviz.py index 9c702f2..6281bf6 100644 --- a/npu/utils/appviz.py +++ b/npu/utils/appviz.py @@ -144,11 +144,19 @@ def _draw_connection(self, c, dbuf: bool = False) -> None: if src['type'] == 'CT' and dst['type'] == 'CT': src_row = self._drawn_kernels[src['name']]['row'] + dst_row = self._drawn_kernels[dst['name']]['row'] for i in range(2): self._col_svg.aie_tiles[src_row].add_buffer( self._drawn_kernels[src['name']]['kcolor'], self._kanimate_duration/2, start_empty=not bool(i)) + # if CTs are non neighbors we need to add double buffer in dst + if not self._are_neighbors(src, dst): + self._col_svg.aie_tiles[dst_row].add_buffer( + self._drawn_kernels[src['name']]['kcolor'], + self._kanimate_duration/2, + start_empty= bool(i)) + self._draw_ct2ct_data_movement(src, dst) if src['type'] == 'IT' and dst['type'] == 'CT': @@ -205,7 +213,8 @@ def _draw_connection(self, c, dbuf: bool = False) -> None: dst_buf_color = self._dbuf_colors[c['name']] show_mem_buffer = True - if 'in_buffer' not in c['name']: + mtmode = src.get('mtmode') + if mtmode == 'passthrough': if self._mt2ct_passthrough['found']: dst_buf_color = self._mt2ct_passthrough['color'] show_mem_buffer = False @@ -226,7 +235,7 @@ def _draw_connection(self, c, dbuf: bool = False) -> None: self._kanimate_duration/2, start_empty=dbuf) if not dbuf: - self._draw_mem2ct_ic(dst, c, dst_buf_color) + self._draw_mem2ct_ic(dst, c, dst_buf_color, mtmode) else: self._mt2ct_counter += 1 @@ -256,11 +265,11 @@ def _draw_ct2mem_ic(self, src) -> None: color=src_color) self._ct2mt_counter += 1 - def _draw_mem2ct_ic(self, dst, c, dst_color) -> None: + def _draw_mem2ct_ic(self, dst, c, dst_color, mtmode=None) -> None: """Display animation originating from MT and destination CT""" dst_row = self._loc_conv[dst['tloc'][1]] - delay = (self._mt2ct_counter-1) / 5 + delay = self._mt2ct_counter / 5 self._col_svg.mem_tiles[0].add_ic_animation( diagonal_from_tile=1, @@ -276,8 +285,7 @@ def _draw_mem2ct_ic(self, dst, c, dst_color) -> None: duration=self._kanimate_duration/2, delay=delay, color=dst_color) - if 'in_buffer' in c['name']: - self._mt2ct_counter += 1 + self._mt2ct_counter += int(mtmode == 'split') def _draw_ub2mem_ic(self, src, dst) -> None: """Display animation originating from IT and destination MT""" diff --git a/npu/utils/nputop.py b/npu/utils/nputop.py index 0305563..2d9fbaf 100644 --- a/npu/utils/nputop.py +++ b/npu/utils/nputop.py @@ -2,9 +2,12 @@ # SPDX-License-Identifier: MIT from .xbutil import XBUtil - +import platform def nputop(): """ Uses XBUtil to display all currently running applications in an ipywidgets form suitable for JupyterLab """ - XBUtil().apps() + if platform.system() == "Windows": + XBUtil().apps() + else: + print(f"nputop is not currently supported in linux due to changes in the xbutil api") diff --git a/scripts/linux/Dockerfile b/scripts/linux/Dockerfile new file mode 100644 index 0000000..d93dd2a --- /dev/null +++ b/scripts/linux/Dockerfile @@ -0,0 +1,94 @@ +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +FROM ubuntu:24.04 as base + +ARG BUILD_TEMPDIR +ARG LIC_MAC=00:00:00:00:00:00 +ENV LIC_MAC=${LIC_MAC} + +ARG USER_ID +ARG GROUP_ID +ARG GROUP_NAME + +SHELL ["/bin/bash", "-c"] + +RUN apt-get update +RUN apt-get install -y sudo adduser perl + +RUN deluser --remove-home $(getent passwd | awk -F: -v uid="$USER_ID" '$3 == uid {print $1}') || echo "Ignoring error, as uid:${USER_ID} does not exist" +RUN groupadd -g ${GROUP_ID} ${GROUP_NAME} || echo "Ignoring error, as ${GROUP_NAME} already exists" +RUN useradd -u $USER_ID -g $GROUP_ID -ms /bin/bash riallto +RUN adduser riallto sudo +RUN echo 'riallto ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/riallto +USER riallto +ENV HOME /home/riallto +WORKDIR /home/riallto + +RUN sudo chmod a+rwx /root + +ARG DEBIAN_FRONTEND=noninteractive + +# Generate locales +RUN sudo apt-get update +RUN sudo apt-get install -y git vim wget locales +RUN sudo locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 +RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC sudo -E apt-get install -y tzdata + +RUN git clone https://github.com/amd/xdna-driver.git --recursive --depth=1 +RUN sudo chmod +x /home/riallto/xdna-driver/xrt/src/runtime_src/tools/scripts/xrtdeps.sh +RUN sudo /home/riallto/xdna-driver/xrt/src/runtime_src/tools/scripts/xrtdeps.sh -docker + +COPY $BUILD_TEMPDIR/root/debs npu_ubuntu_debs + +RUN sudo apt-get install -y dkms udev python3 +RUN sudo apt-get install -y python3-pip ocl-icd-opencl-dev uuid-dev + +RUN sudo -E dpkg -i npu_ubuntu_debs/xrt_*-amd64-xrt.deb +RUN sudo -E dpkg -i npu_ubuntu_debs/xrt_plugin*-amdxdna.deb + +RUN git clone https://github.com/AMDResearch/Riallto -b v1.1 --depth=1 + +SHELL ["/bin/bash", "-l", "-c"] + +# Setup the bashrc environment +RUN echo "source /opt/xilinx/xrt/setup.sh" >> ~/.bashrc +RUN sed -i '6,9d' ~/.bashrc # remove interactive only mode in bashrc +RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/xilinx/xrt/lib" + +# Setup Riallto +RUN sudo apt-get install -y python3-venv +RUN python3 -m venv ~/riallto_venv +RUN echo "source ~/riallto_venv/bin/activate" >> ~/.bashrc +RUN source ~/.bashrc && cd ~/Riallto/scripts/wsl/ && chmod +x ubuntu_deps.sh && sudo ./ubuntu_deps.sh +RUN source ~/.bashrc && cd ~/Riallto/scripts/wsl/ && python3 -m pip install -r requirements.txt +RUN python3 -m pip install ~/Riallto + +# Install MLIR-AIE and Xilinx tools +COPY $BUILD_TEMPDIR/*.tar.gz /home/riallto/ +RUN cd ~/Riallto/scripts/wsl/ && chmod +x setup_env.sh && sudo -E ./setup_env.sh /home/riallto/xilinx_tools.tar.gz /home/riallto/pynqMLIR-AIE.tar.gz && sudo chmod -R a+rwx /opt/ + +# Patch the MLIR-AIE xclbin metadata generation +RUN sudo sed -i '340 i\ "subtype":"DPU",' /opt/mlir-aie/python/aie/compiler/aiecc/main.py + +# Setup license +COPY $BUILD_TEMPDIR/Xilinx.lic /opt/ +RUN sudo apt-get install -y iproute2 + +# For external license +RUN sudo -E echo 'export XILINXD_LICENSE_FILE=/opt/Xilinx.lic' >> /opt/mlir_settings.sh +RUN sudo -E echo 'sudo ip link add vmnic0 type dummy || true' >> /opt/mlir_settings.sh +RUN sudo -E echo 'sudo ip link set vmnic0 addr ${LIC_MAC} || true' >> /opt/mlir_settings.sh + +# Install Jupyterhub/notebooks +RUN source ~/.bashrc && python3 -m pip install jupyterhub +RUN source ~/.bashrc && python3 -m pip install notebook + +#Cleanup a bit +RUN rm -rf /home/riallto/*.tar.gz + +CMD ["/bin/bash", "-l", "-c", "jupyterhub-singleuser", "--allow-root"] + diff --git a/scripts/linux/README.md b/scripts/linux/README.md new file mode 100644 index 0000000..b6d6ff7 --- /dev/null +++ b/scripts/linux/README.md @@ -0,0 +1,38 @@ +# Riallto Ubuntu 24.04 setup + +Currently there is support for bringing up Riallto on Ubuntu 24.04 with docker. +To use Riallto on Linux requires the use of the [xdna-driver](https://github.com/amd/xdna-driver) which is installed as part of the setup. +This driver requires version 6.10+ of the linux kernel, these scripts will upgrade a standard Ubuntu 24.04 installation to this kernel version. __Using this kernel version will require disabling secure boot on your device.__ + +## Install steps + +On an NPU enabled laptop running Ubuntu 24.04. + +1. __Setup Docker.__ +You can follow the steps [here](https://docs.docker.com/engine/install/ubuntu/). + +2. __Add your user to the docker user group and then relogin.__ +``` +sudo usermod -aG docker $USER ; exit +``` + +3. __Obtain a license file for Riallto.__ +Please follow the [guide here](https://riallto.ai/prerequisites-aie-license.html#prerequisites-aie-license) + +4. __Disable secure boot from your BIOS settings.__ For now we are using an unsigned kernel version requiring that secure boot is disabled before it can be used. To disable secure boot there is a [guide](https://learn.microsoft.com/en-us/windows-hardware/manufacture/desktop/disabling-secure-boot?view=windows-11) from Microsoft here, but often the steps depend on your hardware manufacturer. + +5. __Run `./setup_riallto_linux.sh `.__ +This command will check the kernel version and if the xdna-driver has been installed. If the kernel is not 6.10 or the NPU device drivers are missing it will build them within a docker and install them on the host machine. This takes about 10 minutes to run and after completing successfully the user will be asked to restart. + +6. __Reboot the machine.__ +To finish upgrading the kernel to `6.10`. + +7. __Re run the `./setup_riallto_linux.sh ` script.__ +This will build the Riallto docker and will take about 20 minutes. + +## Running Riallto / Running Tests +Inside this directory there are a few scripts. + +* `launch_jupyter.sh ` - will launch a jupyterlab server from a docker container allowing you to use Riallto notebooks in the directory provided (usually Riallto/notebooks). +* `run_pytest.sh` - will run a suit of pytests to test the operation of your NPU device and the Riallto installation. + diff --git a/scripts/linux/create_container.sh b/scripts/linux/create_container.sh new file mode 100755 index 0000000..9443899 --- /dev/null +++ b/scripts/linux/create_container.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +docker run -dit --rm --name riallto_docker \ + --cap-add=NET_ADMIN \ + -v $(pwd):/workspace \ + --device=/dev/accel/accel0:/dev/accel/accel0 \ + -w /workspace \ + riallto:latest \ + /bin/bash + +docker exec -it riallto_docker /bin/bash diff --git a/scripts/linux/launch_jupyter.sh b/scripts/linux/launch_jupyter.sh new file mode 100755 index 0000000..6f43fec --- /dev/null +++ b/scripts/linux/launch_jupyter.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +# Check input args +if [ -z "$1" ]; then + echo "Error: No notebooks directory supplied (usually Riallto/notebooks)" + echo "Usage: $0 " + exit 1 +fi + +DIR="$1" +ABS_DIR=$(cd "$DIR" && pwd) + +# check to make sure the directory exists +if [ ! -d "$ABS_DIR" ]; then + echo "Error: $ABS_DIR is not a valid directory or does not exist." + exit +fi + +docker container stop riallto_jupyter > /dev/null 2>&1 || true +docker container wait riallto_jupyter > /dev/null 2>&1 || true + +webcams=$(ls /dev/video*) +cmd="docker run -dit --rm --name riallto_jupyter" +cmd+=" --cap-add=NET_ADMIN --device=/dev/accel/accel0:/dev/accel/accel0" +cmd+=" -p 8888:8888 " +cmd+=" -v $ABS_DIR:/notebooks " +for cam in $webcams; do + cmd+=" --device=$cam:$cam" +done +cmd+=" -w /notebooks riallto:latest /bin/bash" + +echo " running $cmd" +eval $cmd + + +docker exec -it riallto_jupyter /bin/bash -c " (sudo chmod 666 /dev/video* || true) && source ~/.bashrc && cd /notebooks && python3 -m jupyterlab --ip=0.0.0.0 --port=8888 --no-browser --NotebookApp.token=''" + +docker container stop riallto_jupyter > /dev/null 2>&1 || true diff --git a/scripts/linux/run_pytest.sh b/scripts/linux/run_pytest.sh new file mode 100755 index 0000000..5beff02 --- /dev/null +++ b/scripts/linux/run_pytest.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +docker container stop riallto_pytest || true +docker container wait riallto_pytest || true + +docker run -dit --rm --name riallto_pytest \ + --cap-add=NET_ADMIN \ + -v $(pwd):/workspace \ + --device=/dev/accel/accel0:/dev/accel/accel0 \ + -w /workspace \ + riallto:latest \ + /bin/bash + +docker exec -it riallto_pytest /bin/bash -c "source ~/.bashrc && cd /home/riallto/Riallto && python3 -m pytest ./tests" + +docker container stop riallto_pytest || true diff --git a/scripts/linux/setup_riallto_linux.sh b/scripts/linux/setup_riallto_linux.sh new file mode 100755 index 0000000..8ec2557 --- /dev/null +++ b/scripts/linux/setup_riallto_linux.sh @@ -0,0 +1,220 @@ +#!/bin/bash + +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +set -e + +DRIVER_TARBALL=ubuntu24.04_npu_drivers.tar.gz +REQUIRED_KERNEL_VERSION="6.10.0-061000rc2-generic" +NPU_FIRMWARE="/lib/firmware/amdnpu/1502_00/npu.sbin" +KERNEL_HEADERS=linux-headers-6.10.0-061000rc2_6.10.0-061000rc2.202406022333_all.deb +KERNEL_HEADERS_GENERIC=linux-headers-6.10.0-061000rc2-generic_6.10.0-061000rc2.202406022333_amd64.deb +KERNEL_MODULES=linux-modules-6.10.0-061000rc2-generic_6.10.0-061000rc2.202406022333_amd64.deb +KERNEL_IMAGE=linux-image-unsigned-6.10.0-061000rc2-generic_6.10.0-061000rc2.202406022333_amd64.deb + +############# CHECKS ################################## +# Check to ensure key URLs are accessible +URLS=( +"https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/" +"https://www.xilinx.com/bin/public/openDownload?filename=pynqMLIR_AIE_py312_v0.9.tar.gz" +"https://www.xilinx.com/bin/public/openDownload?filename=Riallto-v1.1.zip" +"https://github.com/amd/xdna-driver.git" +"https://github.com/AMDResearch/Riallto.git" +"https://docker.io/library/ubuntu" +) + +for URL in "${URLS[@]}"; do + if curl --output /dev/null --silent --head --fail "$URL"; then + echo "[CHECK OK] URL is reachable: $URL" + else + echo "WARNING: [CHECK FAILED] URL is not reachable: $URL" + echo "Some parts of the installation might not work correctly." + while true; do + read -p "Are you happy to continue? [Y/N] " answer + case $answer in + [Yy]* ) echo "Continuing..."; break;; + [Nn]* ) echo "Exiting"; exit 1;; + * ) echo "Please chose Y or N.";; + esac + done + + fi +done + +# Check that we are on Ubuntu24.04 +distro_info=$(lsb_release -d) +if [[ $distro_info != *"Ubuntu 24.04"* ]]; then + echo "Riallto is only currently supported on Ubuntu 24.04" + exit 1 +fi + +# Check that docker is installed +if command -v docker >/dev/null 2>&1; then + echo "Docker has been found." +else + echo "Docker could not be found on this system." + echo "Unable to continue the installation." + echo "Please configure docker using the instructions found here:" + echo "https://docs.docker.com/engine/install/ubuntu/" + echo "And then rerun the script" + exit 1 +fi + +############### License file check ################### +# Check to make sure that a license file has been provided and that +# a MAC address can be extracted from it for adding into the docker +# image +if [ "$#" -ne 1 ]; then + echo "Usage $0 " + exit 1 +fi +LIC_FILE="$1" + +# Check to make sure that the license file exists +if [ ! -f "$LIC_FILE" ]; then + echo "Unable to open the license file $LIC_FILE" + exit 1 +fi + +MAC=$(grep -oP 'HOSTID=\K[^;]+' $1 | head -n1 | sed 's/\(..\)/\1:/g; s/:$//') +echo "Found a License file associated with MAC address $MAC" +#################################################### + + +######### Kernel and NPU driver check / install ########### +# Check to see if the kernel version and NPU driver is already installed +build_xrt=0 +kernel_version=$(uname -r) + +if [[ "$kernel_version" == "$REQUIRED_KERNEL_VERSION" ]]; then + echo "Kernel version is okay, is NPU available?" +else + echo "To install Riallto requires upgrading your kernel to ${REQUIRED_KERNEL_VERSION}" + echo "WARNING: This can be quite disruptive to your system configuration." + echo "After upgrading you will have to restart your machine and rerun this script" + while true; do + read -p "Are you happy to continue? [Y/N] " answer + case $answer in + [Yy]* ) echo "You chose yes, attempting to update kernel"; break;; + [Nn]* ) echo "Exiting"; exit 1;; + * ) echo "Please chose Y or N.";; + esac + done + + # First check to make sure that secure boot is disabled. + if mokutil --sb-state | grep -q "enabled"; then + echo "Secure boot is currently enabled." + echo "To install Riallto on Linux currently requires a" + echo "non-mainline kernel version ${REQUIRED_KERNEL_VERSION}." + echo "If you would like to continue with the installation " + echo "please disable secure boot in your bios settings and rerun this script." + exit 1 + fi + + _kbump_dir=$(mktemp -d) + + wget -P ${_kbump_dir} https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_HEADERS_GENERIC + wget -P ${_kbump_dir} https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_HEADERS + wget -P ${_kbump_dir} https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_IMAGE + wget -P ${_kbump_dir} https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_MODULES + + pushd $_kbump_dir/ + sudo dpkg -i $KERNEL_HEADERS + sudo dpkg -i $KERNEL_HEADERS_GENERIC + sudo dpkg -i $KERNEL_MODULES + sudo dpkg -i $KERNEL_IMAGE + popd + echo -e "\033[31mPlease now restart your machine and rerun the script.\033[0m" + exit 1 +fi + +if [ -f "./xdna-driver-builder/${DRIVER_TARBALL}" ]; then + echo "NPU driver is available, just setting up Riallto" + build_xrt=0; +else + build_xrt=1 +fi + +if [ $build_xrt -eq 1 ]; then + # Building the NPU driver version and installing it + + if [ ! -f "./xdna-driver-builder/${DRIVER_TARBALL}" ]; then + echo "xdna-driver-builder/${DRIVER_TARBALL} is missing, building it from scratch" + pushd xdna-driver-builder + ./build.sh + popd + else + echo "Driver tarball already exists." + fi + +fi + +# Build the NPU drivers (xdna-driver) +if [ ! -f "${NPU_FIRMWARE}" ]; then + npu_install_tmp_dir=$(mktemp -d) + tar -xzvf "./xdna-driver-builder/${DRIVER_TARBALL}" -C "${npu_install_tmp_dir}" + pushd $npu_install_tmp_dir/root/debs + sudo -E dpkg -i xrt_*-amd64-xrt.deb || true + sudo -E dpkg -i xrt_plugin*-amdxdna.deb || true + sudo apt -y --fix-broken install + popd +fi +######################################################### + +########### Riallto Docker image construction ########### +echo "Building Riallto docker image" +build_tmp=./_work +rm -rf $build_tmp +mkdir -p $build_tmp + +USER_ID=`id -u` +GROUP_ID=`id -g` +GROUP_NAME=`id -g -n` + +## Checks to make sure that all the required tarballs and license are in the directory +if [ ! -f "./pynqMLIR-AIE.tar.gz" ]; then + echo "Error! pynqMLIR-AIE.tar.gz is missing, downloading from opendownloads..." + wget -O $build_tmp/pynqMLIR-AIE.tar.gz https://www.xilinx.com/bin/public/openDownload?filename=pynqMLIR_AIE_py312_v0.9.tar.gz +else + cp pynqMLIR-AIE.tar.gz $build_tmp +fi + +if [ ! -f "./xilinx_tools.tar.gz" ]; then + echo "xilinx_tools.tar.gz is missing, downloading it from opendownloads..." + wget -O $build_tmp/riallto_installer.zip https://www.xilinx.com/bin/public/openDownload?filename=Riallto-v1.1.zip + pushd $build_tmp + unzip riallto_installer.zip + mv Riallto-v1.1/Riallto/downloads/xilinx_tools_latest.tar.gz ./xilinx_tools.tar.gz + popd + cp $build_tmp/Riallto-v1.1/eula.txt ./ +else + cp xilinx_tools.tar.gz $build_tmp +fi + +while true; do + read -p "Do you agree to the terms in ./eula.txt and wish to proceed [y/n]? " answer + case $answer in + [Yy]* ) echo "Terms accepted"; break;; + [Nn]* ) echo "Exiting"; exit 1;; + * ) echo "Please chose Y or N.";; + esac +done + +cp $LIC_FILE $build_tmp/Xilinx.lic + +tar -xzvf ./xdna-driver-builder/${DRIVER_TARBALL} -C $build_tmp/ + +docker build \ + --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') \ + --build-arg LIC_MAC=$MAC \ + --build-arg USER_ID=${USER_ID} \ + --build-arg GROUP_ID=${GROUP_ID} \ + --build-arg GROUP_NAME=${GROUP_NAME} \ + --build-arg BUILD_TEMPDIR=$build_tmp \ + -t riallto:latest \ + ./ + +rm -rf $build_tmp +##################################################### + diff --git a/scripts/linux/xdna-driver-builder/Dockerfile b/scripts/linux/xdna-driver-builder/Dockerfile new file mode 100644 index 0000000..3f1fe73 --- /dev/null +++ b/scripts/linux/xdna-driver-builder/Dockerfile @@ -0,0 +1,52 @@ +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +FROM ubuntu:24.04 as base + +ARG DEBIAN_FRONTEND=noninteractive +ARG KERNEL_HEADERS +ARG KERNEL_HEADERS_GENERIC +ARG KERNEL_MODULES +ARG KERNEL_IMAGE + +RUN apt-get update +RUN apt-get install -y vim git +RUN apt-get install -y flex bison + +RUN git config --global http.postBuffer 157286400 +RUN apt-get install make + +RUN apt-get install -y dpkg-dev +RUN apt-get install -y bc debhelper rsync kmod cpio libssl-dev:native +RUN apt-get install -y libelf-dev +RUN apt-get install -y zstd + +COPY _work /root/ + +RUN apt-get install -y libelf1t64 +RUN apt-get install -y kmod +RUN apt-get install -y linux-base + +RUN dpkg -i /root/$KERNEL_HEADERS +RUN dpkg -i /root/$KERNEL_HEADERS_GENERIC +RUN dpkg -i /root/$KERNEL_MODULES +RUN dpkg -i /root/$KERNEL_IMAGE + +RUN git config --global url."https://github.com/".insteadOf "git@github.com:" +RUN cd /root && git clone https://github.com/amd/xdna-driver.git --recursive +RUN cd /root/xdna-driver && git checkout 7682e0b4b6c435d6700faef3dc27cd709324e27f + +RUN apt-get install -y cmake jq pkg-config wget libdrm-dev +RUN apt-get install -y python3-pip +RUN apt-get install -y ocl-icd-opencl-dev + +RUN /root/xdna-driver/xrt/src/runtime_src/tools/scripts/xrtdeps.sh -docker + +RUN mkdir /root/debs +RUN cd /root/xdna-driver/build && ./build.sh -release +RUN cd /root/xdna-driver/build && ./build.sh -package + +RUN cd /root/xdna-driver/xrt/build && ./build.sh -noert + +RUN cp /root/xdna-driver/build/Release/xrt_plugin*.deb /root/debs/ +RUN cp /root/xdna-driver/xrt/build/Release/xrt*xrt.deb /root/debs/ diff --git a/scripts/linux/xdna-driver-builder/README.md b/scripts/linux/xdna-driver-builder/README.md new file mode 100644 index 0000000..fef3cf4 --- /dev/null +++ b/scripts/linux/xdna-driver-builder/README.md @@ -0,0 +1,42 @@ +# XDNA Linux NPU driver builder +A docker-based script that will recompile the kernel/xrt/xrt_plugin for the NPU device on linux. +It will produce a tarball containing all the debian files needed to setup the system. + +### To build the drivers +Run the following command to build the tarball with the debs. +```bash +./build.sh +``` + +This will take a while. +The expected output is a `ubuntu24.04_npu_drivers.tar.gz` tarball. + +### Setting up your system + +First disable secure boot from the bios of your system. + +Extract the tarball, and update the kernel. +``` +tar -xzvf ubuntu24.04_npu_drivers.tar.gz +sudo dpkg -i ./root/debs/linux-headers-*_amd64.deb +sudo dpkg -i ./root/debs/linux-image-*_amd64.deb +sudo dpkg -i ./root/debs/linux-libc-*_amd64.deb +``` + +Once that has completed restart your machine. + + +Then install XRT and XRT plugin: +``` +sudo dpkg -i ./root/debs/xrt_*xrt.deb +sudo dpkg -i ./root/debs/xrt_plugin.*-amdxdna.deb +``` + +### FAQ + +* If you get the following error on boot: +``` +error: bad shim signature +``` +This means that secure boot has not been disabled from the machine and it cannot run the necessary kernel version. + diff --git a/scripts/linux/xdna-driver-builder/build.sh b/scripts/linux/xdna-driver-builder/build.sh new file mode 100755 index 0000000..4cd203c --- /dev/null +++ b/scripts/linux/xdna-driver-builder/build.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# +# Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +KERNEL_HEADERS=linux-headers-6.10.0-061000rc2_6.10.0-061000rc2.202406022333_all.deb +KERNEL_HEADERS_GENERIC=linux-headers-6.10.0-061000rc2-generic_6.10.0-061000rc2.202406022333_amd64.deb +KERNEL_MODULES=linux-modules-6.10.0-061000rc2-generic_6.10.0-061000rc2.202406022333_amd64.deb +KERNEL_IMAGE=linux-image-unsigned-6.10.0-061000rc2-generic_6.10.0-061000rc2.202406022333_amd64.deb + +rm -rf _work +mkdir -p _work +wget -P _work https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_HEADERS_GENERIC +wget -P _work https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_HEADERS +wget -P _work https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_IMAGE +wget -P _work https://kernel.ubuntu.com/mainline/v6.10-rc2/amd64/$KERNEL_MODULES + +# Build a container that creates the appropriate linux kernel version +docker build \ + -t xdna_deb_builder:latest \ + --build-arg KERNEL_HEADERS=$KERNEL_HEADERS \ + --build-arg KERNEL_HEADERS_GENERIC=$KERNEL_HEADERS_GENERIC \ + --build-arg KERNEL_MODULES=$KERNEL_MODULES \ + --build-arg KERNEL_IMAGE=$KERNEL_IMAGE \ + ./ + +docker kill xdna_deb_builder_container || true + +# Lauch an image with that container +docker run -dit --rm --name xdna_deb_builder_container \ + -v $(pwd):/workspace \ + -w /workspace/ \ + xdna_deb_builder:latest \ + /bin/bash + +docker exec xdna_deb_builder_container bash -c "tar -zcvf driver.tar.gz /root/debs && mv driver.tar.gz /workspace/ubuntu24.04_npu_drivers.tar.gz" + +## cleanup +docker kill xdna_deb_builder_container || true +docker image rm --force xdna_deb_builder:latest diff --git a/scripts/utils/setup_onnx.ps1 b/scripts/utils/setup_onnx.ps1 index ede4f16..aa88c36 100644 --- a/scripts/utils/setup_onnx.ps1 +++ b/scripts/utils/setup_onnx.ps1 @@ -17,9 +17,9 @@ function Get-Wheels() { Write-Host "Downloading ONNX Runtime wheels" try { - Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/56c064821408cf1f7fe3035a303cb3982b6be903/demo/cloud-to-client/wheels/onnxruntime_vitisai-1.15.1-cp39-cp39-win_amd64.whl" -OutFile "$wheelDest\onnxruntime_vitisai-1.15.1-cp39-cp39-win_amd64.whl" - Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/56c064821408cf1f7fe3035a303cb3982b6be903/demo/cloud-to-client/wheels/voe-0.1.0-cp39-cp39-win_amd64.whl" -OutFile "$wheelDest\voe-0.1.0-cp39-cp39-win_amd64.whl" - Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/944ffbbc3a3031cf70871d5a3147490f6a2f24f4/tutorial/RyzenAI_quant_tutorial/onnx_example/pkgs/vai_q_onnx-1.16.0+60e82ab-py2.py3-none-any.whl" -OutFile "$wheelDest\vai_q_onnx-1.16.0+60e82ab-py2.py3-none-any.whl" + Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/c9d3db1418c0f7ae15a617fa0b79f12d8dbf6e24/demo/cloud-to-client/wheels/onnxruntime_vitisai-1.15.1-cp39-cp39-win_amd64.whl" -OutFile "$wheelDest\onnxruntime_vitisai-1.15.1-cp39-cp39-win_amd64.whl" + Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/c9d3db1418c0f7ae15a617fa0b79f12d8dbf6e24/demo/cloud-to-client/wheels/voe-0.1.0-cp39-cp39-win_amd64.whl" -OutFile "$wheelDest\voe-0.1.0-cp39-cp39-win_amd64.whl" + Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/c9d3db1418c0f7ae15a617fa0b79f12d8dbf6e24/tutorial/RyzenAI_quant_tutorial/onnx_example/pkgs/vai_q_onnx-1.16.0+60e82ab-py2.py3-none-any.whl" -OutFile "$wheelDest\vai_q_onnx-1.16.0+60e82ab-py2.py3-none-any.whl" } catch { Write-Host "Failed to download ONNX Runtime wheels: $_" } @@ -89,8 +89,8 @@ function Get-Binaries() { try { # Download xclbins and jsons - Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/56c064821408cf1f7fe3035a303cb3982b6be903/demo/cloud-to-client/xclbin/1x4.xclbin" -OutFile "$xclbinDest\1x4.xclbin" - Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/56c064821408cf1f7fe3035a303cb3982b6be903/demo/cloud-to-client/models/vaip_config.json" -OutFile "$xclbinDest\vaip_config.json" + Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/c9d3db1418c0f7ae15a617fa0b79f12d8dbf6e24/demo/cloud-to-client/xclbin/1x4.xclbin" -OutFile "$xclbinDest\1x4.xclbin" + Invoke-WebRequest -Uri "https://github.com/amd/RyzenAI-SW/raw/c9d3db1418c0f7ae15a617fa0b79f12d8dbf6e24/demo/cloud-to-client/models/vaip_config.json" -OutFile "$xclbinDest\vaip_config.json" } catch { Write-Host "Failed to download xclbins from https://github.com/amd/RyzenAI-SW: $_" diff --git a/scripts/utils/setup_python.ps1 b/scripts/utils/setup_python.ps1 index 2514bd5..e738974 100644 --- a/scripts/utils/setup_python.ps1 +++ b/scripts/utils/setup_python.ps1 @@ -62,7 +62,7 @@ function Install-Python($version) { # Install Python without UI try { - Start-Process -FilePath "$env:TEMP\python-$version-amd64.exe" -ArgumentList "/passive InstallAllUsers=0 PrependPath=1 Include_launcher=1" -Wait -NoNewWindow + Start-Process -FilePath "$env:TEMP\python-$version-amd64.exe" -ArgumentList "/passive InstallAllUsers=1 PrependPath=1 Include_launcher=1" -Wait -NoNewWindow } catch { Write-Host "Failed to install Python, make sure you are running this as administrator or install Python manually" -ForegroundColor red -BackgroundColor black } diff --git a/scripts/utils/upgrade_ryzenaisw.ps1 b/scripts/utils/upgrade_ryzenaisw.ps1 new file mode 100644 index 0000000..4db3c5b --- /dev/null +++ b/scripts/utils/upgrade_ryzenaisw.ps1 @@ -0,0 +1,59 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +param ( + [string]$zipPath +) + +& uninstall_onnx.ps1 + +# Enter venv +$venvPath = Join-Path -Path $PSScriptRoot -ChildPath "activate_venv.ps1" +. $venvPath + +# Unzip and cd into the RyzenAI-SW package +$zipFolderName = [System.IO.Path]::GetFileNameWithoutExtension($zipPath) +if (-Not (Test-Path $zipFolderName)) { + Expand-Archive -Path $zipPath -DestinationPath . +} +cd ".\$zipFolderName" + +# Install the required Python packages +try { + py -m pip install .\voe-4.0-win_amd64\onnxruntime_vitisai-1.15.1-cp39-cp39-win_amd64.whl + py -m pip install .\voe-4.0-win_amd64\voe-0.1.0-cp39-cp39-win_amd64.whl + py -m pip install .\vai_q_onnx-1.16.0+69bc4f2-py2.py3-none-any.whl +} catch { + Write-Output "Failed to install RyzenAI-SW wheels" + Exit 1 +} + +# Our python env site-packages directory +$siteDir = (py -m pip show pip | Select-String "Location:").Line.Split(" ")[1] + +# Copy the DLL files required by ONNX Runtime capi +$dllFiles = @( + "C:\Windows\System32\AMD\xrt_core.dll", + "C:\Windows\System32\AMD\xrt_coreutil.dll", + "C:\Windows\System32\AMD\amd_xrt_core.dll", + "C:\Windows\System32\AMD\xdp_ml_timeline_plugin.dll", + "C:\Windows\System32\AMD\xdp_core.dll" +) + +try { + foreach ($dll in $dllFiles) { + Copy-Item -Path $dll -Destination "$siteDir\onnxruntime\capi\" + } +} catch { + Write-Output "Failed to copy dlls to $siteDir\onnxruntime\capi" +} + + +# Copy xclbin and config to Riallto notebooks folder +$destinationDir = "C:\users\$Env:UserName\AppData\Roaming\riallto_notebooks\onnx\xclbins" +try { + Copy-Item -Path .\voe-4.0-win_amd64\vaip_config.json -Destination $destinationDir + Copy-Item -Path .\voe-4.0-win_amd64\1x4.xclbin -Destination $destinationDir +} catch { + Write-Output "Failed to copy files to $destinationDir" +} diff --git a/scripts/wsl/create_wsl_distro.ps1 b/scripts/wsl/create_wsl_distro.ps1 index 7f06bdf..aada69a 100644 --- a/scripts/wsl/create_wsl_distro.ps1 +++ b/scripts/wsl/create_wsl_distro.ps1 @@ -42,6 +42,15 @@ function New-Ubuntu { } } + # Confirm download succeeded + Write-Output "Checking if $downloadPath\ubuntuLTS.appx successfully downloaded" + if (-Not ((Test-Path -Path $downloadPath\ubuntuLTS.appx) -Or (Test-Path -Path $downloadPath\ubuntuLTS.zip))) { + Write-Output "Could not find downloaded Ubuntu WSL image" + Write-Output "ubuntuLTS.appx or ubuntuLTS.zip not found in $downloadPath" + Write-Output "Please make sure you have access to https://aka.ms/wslubuntu2004" + Exit 1 + } + try { Write-Output "Creating new Ubuntu WSL instance" if (-Not (Test-Path -Path $downloadPath\ubuntuLTS.zip)){ diff --git a/setup.py b/setup.py index 7ef6941..22e0599 100644 --- a/setup.py +++ b/setup.py @@ -15,10 +15,21 @@ """ from setuptools import find_packages, setup +import platform + +# Windows and linux have different bindings version, so we need to add +# the appropriate constraint based on the platform. +required_python_version = "" +if platform.system() == 'Linux': + required_python_version = "3.12.*" +elif platform.system() == 'Windows': + required_python_version = "3.9.*" +else: + raise OSError(f'Unknown Operating System: {platform.os.name} {platform.system()}') setup( name="npu", - version='1.0', + version='1.1', package_data={ '': ['*.py', '*.pyd', '*.so', '*.dll', 'Makefile', '.h', '.cpp', 'tests/*', @@ -39,7 +50,7 @@ 'lib/applications/binaries/*'], }, packages=find_packages(), - python_requires="==3.9.*", + python_requires=f"=={required_python_version}", install_requires=[ "numpy<2.0", "pytest", @@ -49,6 +60,7 @@ "CppHeaderParser", "jupyterlab", "ipywidgets", - "pillow>=10.0.0" + "pillow>=10.0.0", + "ml_dtypes" ], - description="Riallto is a simple framework for programming and interacting with the AMD IPU device.") + description="Riallto is a simple framework for programming and interacting with the AMD NPU device.") diff --git a/tests/test_applications.py b/tests/test_applications.py index c2c7289..1fc67ba 100644 --- a/tests/test_applications.py +++ b/tests/test_applications.py @@ -9,6 +9,7 @@ from pathlib import Path from npu.utils.test_device import get_device_status, reset_npu from npu.utils.xbutil import XBUtil +import platform from npu.build.appbuilder import AppBuilder from npu.build.kernelbuilder import KernelObjectBuilder @@ -296,13 +297,14 @@ def _test_appbuild(app, *args): def check_npu(): """Utility to check that the IPU is available before a runtime test runs, otherwise skip test.""" - if not get_device_status() == "OK": - pytest.skip('Skipping test because the IPU device is not enabled on this device.') - xbu = XBUtil() - for app in xbu.list_apps(): - appname = list(app.keys())[0] - if appname.endswith("IPURiallto"): - pytest.skip('Skipping test because the IPU is in an unstable state.') + if platform.system() == 'Windows': + if not get_device_status() == "OK": + pytest.skip('Skipping test because the IPU device is not enabled on this device.') + xbu = XBUtil() + for app in xbu.list_apps(): + appname = list(app.keys())[0] + if appname.endswith("IPURiallto"): + pytest.skip('Skipping test because the IPU is in an unstable state.') def reset_device(): @@ -324,15 +326,16 @@ def manage_testing(request): Also save all the test specific artifacts to a given location.""" yield - test_name = request.node.name - os.makedirs(Path('logs'), exist_ok=True) - logdir = Path(f'logs/{test_name}') - if os.path.exists(logdir) and os.path.isdir(logdir): - shutil.rmtree(logdir) - os.makedirs(logdir, exist_ok=True) - for f in Path.cwd().glob('*.xclbin'): - shutil.move(f, logdir) - for f in Path.cwd().glob('*.seq'): - shutil.move(f, logdir) - for f in Path.cwd().glob('*.mlir'): - shutil.move(f, logdir) + if platform.system() == 'Windows': + test_name = request.node.name + os.makedirs(Path('logs'), exist_ok=True) + logdir = Path(f'logs/{test_name}') + if os.path.exists(logdir) and os.path.isdir(logdir): + shutil.rmtree(logdir) + os.makedirs(logdir, exist_ok=True) + for f in Path.cwd().glob('*.xclbin'): + shutil.move(f, logdir) + for f in Path.cwd().glob('*.seq'): + shutil.move(f, logdir) + for f in Path.cwd().glob('*.mlir'): + shutil.move(f, logdir) diff --git a/tests/test_multi_load_app.py b/tests/test_multi_load_app.py index fd11463..ffb0cc3 100644 --- a/tests/test_multi_load_app.py +++ b/tests/test_multi_load_app.py @@ -4,6 +4,8 @@ import pytest import numpy as np import os +import platform + from .test_applications import check_npu from npu.runtime import AppRunner from npu.utils.xbutil import XBUtil @@ -16,6 +18,15 @@ def _get_full_path(xclbin: str = None) -> str: return os.path.abspath(os.path.join(binaries, xclbin)) + +def _xbutil(appcount): + state = True + if platform.system() == 'Windows': + appsreport = XBUtil() + state = appsreport.app_count == appcount + del appsreport + return state + def test_double_load_custom_app(): """Tests loading two applications with the same name/UUID simultaneously""" check_npu() @@ -29,9 +40,9 @@ def test_double_load_custom_app(): assert app app1 = AppRunner("SimplePlusN.xclbin") assert app1 - appsreport = XBUtil() - assert appsreport.app_count == 2 - del app, app1, appsreport + + assert _xbutil(2) + del app, app1 @pytest.mark.parametrize('numappsreport', [2, 3, 4]) @@ -43,14 +54,15 @@ def test_videoapp_n_loads(numappsreport): for _ in range(numappsreport): app.append(AppRunner(appbin)) - appsreport = XBUtil() - assert appsreport.app_count == numappsreport + assert _xbutil(numappsreport) for i in range(numappsreport): assert app[i] - del app, appsreport + del app +@pytest.mark.skipif(platform.system() == 'Linux', + reason="Skip because we don't get app_count in Linux") def test_videoapp_five_loads(): """Load five instances of the same app. AppRunner should return a RuntimeError indicating not enough space @@ -64,12 +76,11 @@ def test_videoapp_five_loads(): for i in range(4): assert app[i] - appsreport = XBUtil() - assert appsreport.app_count == 4 + assert _xbutil(4) with pytest.raises(RuntimeError) as verr: app1 = AppRunner(appbin) del app1 assert 'There is currently no free space on the NPU' in str(verr.value) - - del app, appsreport + + del app diff --git a/tests/test_onnx.py b/tests/test_onnx.py index 9767952..4e8b7e1 100644 --- a/tests/test_onnx.py +++ b/tests/test_onnx.py @@ -1,102 +1,102 @@ -import torch -import torch.nn as nn -import torch.optim as optim -import torchvision -import torchvision.transforms as transforms -from onnxruntime.quantization import CalibrationDataReader, QuantType, QuantFormat - -import os -import pytest - -@pytest.mark.skip(reason="Skipping to ease load on CI.") -def test_quantization(): - - torch.manual_seed(1337) - - class MLP(nn.Module): - def __init__(self): - super(MLP, self).__init__() - self.fc1 = nn.Linear(28 * 28, 32) - self.relu = nn.ReLU() - self.fc2 = nn.Linear(32, 10) - - def forward(self, x): - x = x.view(-1, 28 * 28) # Flatten the input - x = self.fc1(x) - x = self.relu(x) - x = self.fc2(x) - return x - - transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) - testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform) - testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True) - - model = MLP() - criterion = nn.CrossEntropyLoss() - optimizer = optim.Adam(model.parameters(), lr=3e-4) - - # Training loop - for epoch in range(5): - running_loss = 0.0 - for data in testloader: - inputs, labels = data - optimizer.zero_grad() - - # Forward pass - outputs = model(inputs) - loss = criterion(outputs, labels) - - # Backward pass - loss.backward() - optimizer.step() - print(f"Final loss: {loss.item()}") - - try: - import vai_q_onnx - except: - raise ImportError("Failed to impot vai_q_onnx") - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} - - input_tensor = torch.randn(1,1,28,28) - - torch.onnx.export( - model, - input_tensor, - "mnist.onnx", - export_params=True, - opset_version=13, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - ) - - class MNISTCalibrationDataReader(CalibrationDataReader): - def __init__(self, batch_size: int = 64): - super().__init__() - self.iterator = iter(testloader) - - def get_next(self) -> dict: - try: - images, labels = next(self.iterator) - return {"input": images.numpy()} - except Exception: - return None - - dr = MNISTCalibrationDataReader() - - vai_q_onnx.quantize_static( - "mnist.onnx", - "mnist_quanized.onnx", - dr, - quant_format=QuantFormat.QDQ, - calibrate_method=vai_q_onnx.PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_dpu=True, - extra_options={'ActivationSymmetric': True} - ) - - assert os.path.isfile("mnist_quanized.onnx") +#import torch +#import torch.nn as nn +#import torch.optim as optim +#import torchvision +#import torchvision.transforms as transforms +#from onnxruntime.quantization import CalibrationDataReader, QuantType, QuantFormat +# +#import os +#import pytest +# +#@pytest.mark.skip(reason="Skipping to ease load on CI.") +#def test_quantization(): +# +# torch.manual_seed(1337) +# +# class MLP(nn.Module): +# def __init__(self): +# super(MLP, self).__init__() +# self.fc1 = nn.Linear(28 * 28, 32) +# self.relu = nn.ReLU() +# self.fc2 = nn.Linear(32, 10) +# +# def forward(self, x): +# x = x.view(-1, 28 * 28) # Flatten the input +# x = self.fc1(x) +# x = self.relu(x) +# x = self.fc2(x) +# return x +# +# transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) +# testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform) +# testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True) +# +# model = MLP() +# criterion = nn.CrossEntropyLoss() +# optimizer = optim.Adam(model.parameters(), lr=3e-4) +# +# # Training loop +# for epoch in range(5): +# running_loss = 0.0 +# for data in testloader: +# inputs, labels = data +# optimizer.zero_grad() +# +# # Forward pass +# outputs = model(inputs) +# loss = criterion(outputs, labels) +# +# # Backward pass +# loss.backward() +# optimizer.step() +# print(f"Final loss: {loss.item()}") +# +# try: +# import vai_q_onnx +# except: +# raise ImportError("Failed to impot vai_q_onnx") +# +# input_names = ['input'] +# output_names = ['output'] +# dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} +# +# input_tensor = torch.randn(1,1,28,28) +# +# torch.onnx.export( +# model, +# input_tensor, +# "mnist.onnx", +# export_params=True, +# opset_version=13, +# input_names=input_names, +# output_names=output_names, +# dynamic_axes=dynamic_axes, +# ) +# +# class MNISTCalibrationDataReader(CalibrationDataReader): +# def __init__(self, batch_size: int = 64): +# super().__init__() +# self.iterator = iter(testloader) +# +# def get_next(self) -> dict: +# try: +# images, labels = next(self.iterator) +# return {"input": images.numpy()} +# except Exception: +# return None +# +# dr = MNISTCalibrationDataReader() +# +# vai_q_onnx.quantize_static( +# "mnist.onnx", +# "mnist_quanized.onnx", +# dr, +# quant_format=QuantFormat.QDQ, +# calibrate_method=vai_q_onnx.PowerOfTwoMethod.MinMSE, +# activation_type=QuantType.QUInt8, +# weight_type=QuantType.QInt8, +# enable_dpu=True, +# extra_options={'ActivationSymmetric': True} +# ) +# +# assert os.path.isfile("mnist_quanized.onnx") diff --git a/tests/test_visualization.py b/tests/test_visualization.py index deb118d..d261103 100644 --- a/tests/test_visualization.py +++ b/tests/test_visualization.py @@ -4,25 +4,40 @@ from pathlib import Path import pytest import numpy as np +from npu.build.kernel import Kernel from npu.build.mtkernel import MTPassThrough, MTSplit, MTConcat from npu.build.appbuilder import AppBuilder from npu.lib.graphs.graph_1ct import RGB720pBuilder from npu.lib import Rgba2Hue, Rgba2Gray, Gray2Rgba, BitwiseAnd from npu.lib import InRange, RgbaRtpThres, ThresholdRgba +import re +import random +import string imgdir = str(Path(__file__).parent / "images") + '/' x_in = np.zeros(shape=(720, 1280, 4), dtype=np.uint8) x_out = np.zeros(shape=(720, 1280, 4), dtype=np.uint8) +def _count_class_occurrences(svgfile, classname): + pattern = re.compile(f'class="{classname}"') + with open(svgfile, 'r', encoding='utf-8') as file: + content = file.read() + matches = pattern.findall(content) + return len(matches) + + @pytest.mark.parametrize('kernel', [RgbaRtpThres, ThresholdRgba]) def test_RGB720pBuilder(kernel): app_builder = RGB720pBuilder(kernel=kernel()) - app_builder.save(f'{imgdir}RGB720pBuilder{str(kernel().name)}.svg') + svgfile = f'{imgdir}RGB720pBuilder{str(kernel().name)}.svg' + app_builder.save(svgfile) + assert _count_class_occurrences(svgfile, 'kernel') == 2 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == 4 def test_single_kernel(): - class SingleKernel(AppBuilder): + class SingleKernelOneCTOneIT(AppBuilder): def __init__(self): self.kernel = RgbaRtpThres() super().__init__() @@ -33,9 +48,11 @@ def callgraph(self, x_in, x_out): 128, 128) x_out[t] = x - app_builder = SingleKernel() + app_builder = SingleKernelOneCTOneIT() _ = app_builder.to_metadata(x_in, x_out) - app_builder.save(f'{imgdir}single_ct_and_it.svg') + app_builder.save(svgfile := f'{imgdir}{app_builder.name}.svg') + assert _count_class_occurrences(svgfile, 'kernel') == 2 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == 4 ct2ct = [['non_neighbor_down', (0, 5), (0, 2)], @@ -63,7 +80,10 @@ def callgraph(self, x_in, x_out): app_builder = Pipeline() x_out1 = np.zeros(shape=(720, 1280), dtype=np.uint8) _ = app_builder.to_metadata(x_in, x_out1) - app_builder.save(imgdir + app[0] + '.svg') + app_builder.save((svgfile := imgdir + app_builder.name + '_' + app[0] + '.svg')) + assert _count_class_occurrences(svgfile, 'kernel') == 4 + aiebuff = 8 if 'non_' in app[0] else 6 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == aiebuff @pytest.mark.parametrize('down', [True, False]) @@ -91,9 +111,13 @@ def callgraph(self, x_in, x_out): x = self.bitwiseand(x, y, 1280*4) x_out[t] = x - app_bldr = ColorDetectApplication() - _ = app_bldr.to_metadata(x_in, x_out) - app_bldr.save(f"{imgdir}ColorDetectApplication_{('down' if down else 'up')}.svg") + app_builder = ColorDetectApplication() + _ = app_builder.to_metadata(x_in, x_out) + svgfile = f"{imgdir}{app_builder.name}_{('down' if down else 'up')}.svg" + app_builder.save(svgfile) + assert _count_class_occurrences(svgfile, 'kernel') == 8 + assert _count_class_occurrences(svgfile, 'mem_tile_buffers') == 2 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == 12 @pytest.mark.parametrize('scale', [1, 2, 4]) @@ -119,12 +143,14 @@ def callgraph(self, xin, xout): app_builder = ScaledUpThresholdApplication() _ = app_builder.to_metadata(x_in, x_out) - app_builder.save(f'{imgdir}ScaledUpThresholdApplication_x{scale}.svg') + app_builder.save(svgfile := f'{imgdir}{app_builder.name}_x{scale}.svg') + assert _count_class_occurrences(svgfile, 'kernel') == scale * 2 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == scale * 4 @pytest.mark.parametrize('dual', [True, False]) def test_mtpassthrough(dual): - class SimpleMemTileApplication(AppBuilder): + class SimpleMemTilePassthrough(AppBuilder): def __init__(self): self.mtbi = MTPassThrough() self.mtbo = MTPassThrough() @@ -140,9 +166,13 @@ def callgraph(self, x_in, x_out): x = self.mtbo(x) x_out[t] = x - app_builder = SimpleMemTileApplication() + app_builder = SimpleMemTilePassthrough() _ = app_builder.to_metadata(x_in, x_out) - app_builder.save(f"{imgdir}memtile_passthrough{('_dual' if dual else '')}.svg") + svgfile = f"{imgdir}{app_builder.name}{('_dual' if dual else '')}.svg" + app_builder.save(svgfile) + assert _count_class_occurrences(svgfile, 'kernel') == 2 + assert _count_class_occurrences(svgfile, 'mem_tile_buffers') == 2 + 2 * dual + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == 4 def test_mixed_kernels_scaledup(): @@ -158,7 +188,7 @@ def callgraph(self, xin, xout): for t in range(720): xs = self.split(xin[t]) for i in range(4): - if (i%2) == 0: + if (i % 2) == 0: xs[i] = self.ks0[i//2](xs[i], 1280) else: xs[i] = self.ks1[i//2](xs[i], 1280) @@ -168,7 +198,10 @@ def callgraph(self, xin, xout): x_out1 = np.zeros(shape=(720, 1280), dtype=np.uint8) app_builder = ScaledUpMixedKernelsApplication() _ = app_builder.to_metadata(x_in, x_out1) - app_builder.save(f'{imgdir}ScaledUpMixedKernelsApplication.svg') + app_builder.save(svgfile := f'{imgdir}{app_builder.name}.svg') + assert _count_class_occurrences(svgfile, 'kernel') == 8 + assert _count_class_occurrences(svgfile, 'mem_tile_buffers') == 16 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == 16 @pytest.mark.parametrize('tloc', ['up', 'down', 'nonneighboring']) @@ -203,12 +236,71 @@ def callgraph(self, xin, xout): for t in range(x_in.shape[0]): xs = self.split(xin[t]) for i in range(2): - size = x_in.shape[1]*x_in.shape[2] // 2 - xo[i] = self.ks0[i](xs[i], size) - xo[i] = self.ks1[i](xo[i], size//4) + size = x_in.shape[1]*x_in.shape[2] // 2 + xo[i] = self.ks0[i](xs[i], size) + xo[i] = self.ks1[i](xo[i], size//4) xout[t] = self.concat(xo) app_builder = ScaledUpDfPipelineApplication() _ = app_builder.to_metadata(x_in, x_out) - app_builder.save(f'{imgdir}ScaledUpDfPipelineApplication_{tloc}.svg') + app_builder.save(svgfile := f'{imgdir}{app_builder.name}_{tloc}.svg') + assert _count_class_occurrences(svgfile, 'kernel') == 8 + assert _count_class_occurrences(svgfile, 'mem_tile_buffers') == 8 + aiebuff = 16 if tloc == 'nonneighboring' else 12 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == aiebuff + + +@pytest.mark.parametrize('randomname', [False, True]) +def test_dataparallel(randomname): + """Test a data parallel application with buffers with random names""" + kernel_src = ''' + #include + #define N 720 + extern "C" { + void passthrough(uint8_t *data_in, uint8_t *data_out) { + for(int i=0; i < N; i++) { + data_out[i] = data_in[i]; + } + } + } // extern "C" + ''' + + def _random_string(): + letters = string.ascii_letters + return ''.join(random.choice(letters) for i in range(16)) + + inname, outname = 'data_in', 'data_out' + if randomname: + inname, outname = _random_string(), _random_string() + kernel_src = kernel_src.replace('data_in', inname) + kernel_src = kernel_src.replace('data_out', outname) + + def setval_behavioral(obj): + objout = getattr(obj, outname) + objin = getattr(obj, inname) + objout.array = objin.array + + class DataParallelPassthrough(AppBuilder): + def __init__(self): + self.split = MTSplit(4) + self.concat = MTConcat() + self.ks = [Kernel(kernel_src, setval_behavioral) for _ in range(4)] + super().__init__() + + def callgraph(self, xin, xout): + for t in range(720): + xs = self.split(xin[t]) + for i in range(4): + xs[i] = self.ks[i](xs[i]) + x = self.concat(xs) + xout[t] = x + + app_builder = DataParallelPassthrough() + _ = app_builder.to_metadata(x_in, x_out) + svgfile = f'{imgdir}{app_builder.name}_{inname}_{outname}.svg' + app_builder.save(svgfile) + + assert _count_class_occurrences(svgfile, 'kernel') == 8 + assert _count_class_occurrences(svgfile, 'mem_tile_buffers') == 16 + assert _count_class_occurrences(svgfile, 'aie_tile_buffers') == 16