Skip to content

Commit

Permalink
upgrade IPEX runtime to r2.3 (wenet-e2e#2538)
Browse files Browse the repository at this point in the history
  • Loading branch information
ZailiWang authored Jun 5, 2024
1 parent fcb4b98 commit 509d05d
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 21 deletions.
23 changes: 13 additions & 10 deletions runtime/core/cmake/ipex.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
message(FATAL_ERROR "Intel Extension For PyTorch supports only Linux for now")
endif()

set(TORCH_VERSION "2.3.0")
set(IPEX_VERSION "2.3.0")

if(CXX11_ABI)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.1%2Bcpu.zip")
set(URL_HASH "SHA256=137a842d1cf1e9196b419390133a1623ef92f8f84dc7a072f95ada684f394afd")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip")
set(URL_HASH "SHA256=f60009d2a74b6c8bdb174e398c70d217b7d12a4d3d358cd1db0690b32f6e193b")
else()
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.0.1%2Bcpu.zip")
set(URL_HASH "SHA256=90d50350fd24ce5cf9dfbf47888d0cfd9f943eb677f481b86fe1b8e90f7fda5d")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip")
set(URL_HASH "SHA256=6b78aff4e586991bb2e040c02b2cfd73bc740059b9d12bcc1c1d7b3c86d2ab88")
endif()
FetchContent_Declare(libtorch
URL ${LIBTORCH_URL}
Expand All @@ -19,13 +22,13 @@ FetchContent_MakeAvailable(libtorch)
find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)

if(CXX11_ABI)
set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.100%2Bcpu.run")
set(URL_HASH "SHA256=f172d9ebc2ca0c39cc93bb395721194f79767e1bc3f82b13e1edc07d1530a600")
set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-cxx11-abi-2.0.100%2Bcpu.run")
set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-cxx11-abi-${IPEX_VERSION}%2Bcpu.run")
set(URL_HASH "SHA256=8aa3c7c37f5cc2cba450947ca04f565fccb86c3bb98f592142375cfb9016f0d6")
set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-cxx11-abi-${IPEX_VERSION}%2Bcpu.run")
else()
set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-2.0.100%2Bcpu.run")
set(URL_HASH "SHA256=8392f965dd9b8f6c0712acbb805c7e560e4965a0ade279b47a5f5a8363888268")
set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-2.0.100%2Bcpu.run")
set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-${IPEX_VERSION}%2Bcpu.run")
set(URL_HASH "SHA256=fecb6244a6cd38ca2d73a45272a6ad8527d1ec2caca512d919daa80adb621814")
set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-${IPEX_VERSION}%2Bcpu.run")
endif()
FetchContent_Declare(intel_ext_pt
URL ${LIBIPEX_URL}
Expand Down
6 changes: 3 additions & 3 deletions runtime/ipex/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

project(wenet VERSION 0.1)

option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
option(CXX11_ABI "whether to use CXX11_ABI libtorch" ON)
option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
option(BUILD_TESTING "whether to build unit test" ON)

Expand All @@ -21,7 +21,7 @@ set(FETCHCONTENT_BASE_DIR ${fc_base})

list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Ofast -mavx2 -mfma -pthread -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Ofast -mavx2 -mfma -pthread -fPIC")

# Include all dependency
include(ipex)
Expand All @@ -30,7 +30,7 @@ include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/kaldi
)
include(wetextprocessing)
include(wetextprocessing)

# Build all libraries
add_subdirectory(utils)
Expand Down
22 changes: 16 additions & 6 deletions runtime/ipex/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## WeNet Server (x86) ASR Demo With Intel® Extension for PyTorch\* Optimization

[Intel® Extension for PyTorch\*](https://github.com/intel/intel-extension-for-pytorch) (IPEX) extends [PyTorch\*](https://pytorch.org/) with up-to-date optimization features for extra performance boost on Intel hardware. The optimizations take advantage of AVX-512, Vector Neural Network Instructions (AVX512 VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel X<sup>e</sup> Matrix Extensions (XMX) AI engines on Intel discrete GPUs.
[Intel® Extension for PyTorch\*](https://github.com/intel/intel-extension-for-pytorch) (IPEX) extends [PyTorch\*](https://pytorch.org/) with up-to-date optimization features for extra performance boost on Intel hardware. The optimizations take advantage of AVX-512, Vector Neural Network Instructions (AVX512 VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel X<sup>e</sup> Matrix Extensions (XMX) AI engines on Intel discrete GPUs.

In the following we are introducing how to accelerate WeNet model inference performance on Intel® CPU machines with the adoption of Intel® Extension for PyTorch\*. The adoption mainly includes the export of pretrained models with IPEX optimization, as well as the buildup of WeNet runtime executables with IPEX C++ SDK. The buildup can be processed from local source code, or directly build and run a docker container in which the runtime binaries are ready.

Expand Down Expand Up @@ -39,7 +39,8 @@ docker run --rm -v $PWD/docker_resource:/home/wenet/runtime/ipex/docker_resource
```

* Step 4. Test in docker container
```

```sh
cd /home/wenet/runtime/ipex
export GLOG_logtostderr=1
export GLOG_v=2
Expand All @@ -57,15 +58,18 @@ model_dir=docker_resource/model
* Step 1. Environment Setup.

WeNet code cloning and default dependencies installation

``` sh
git clone https://github.com/wenet-e2e/wenet
cd wenet
pip install -r requirements.txt
```

Upgrading of PyTorch and TorchAudio, followed by the installation of IPEX

``` sh
pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu --force-reinstall
pip install intel_extension_for_pytorch==2.0.100
pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu --force-reinstall
pip install intel_extension_for_pytorch==2.3.0
```

Installation of related tools: Intel® OpenMP and TCMalloc
Expand All @@ -83,6 +87,7 @@ based on the package manager of your system.
* Step 3. Export the pretrained model with IPEX optimization.

For exporting FP32 runtime model

``` sh
source examples/aishell/s0/path.sh
export OMP_NUM_THREADS=1
Expand All @@ -91,7 +96,9 @@ python wenet/bin/export_ipex.py \
--checkpoint <model_ckpt_filename> \
--output_file <runtime_model_output_filename>
```

If you have an Intel® 4th Generation Xeon (Sapphire Rapids) server, you can export a BF16 runtime model and get better performance by virtue of [AMX instructions](https://en.wikipedia.org/wiki/Advanced_Matrix_Extensions)

``` sh
source examples/aishell/s0/path.sh
export OMP_NUM_THREADS=1
Expand All @@ -101,7 +108,9 @@ python wenet/bin/export_ipex.py \
--output_file <runtime_model_output_filename> \
--dtype bf16
```

And for exporting int8 quantized runtime model

``` sh
source examples/aishell/s0/path.sh
export OMP_NUM_THREADS=1
Expand Down Expand Up @@ -132,6 +141,7 @@ ipexrun --no-python \
--model_path $model_dir/<runtime_model_filename> \
--unit_path $model_dir/units.txt 2>&1 | tee log.txt
```
NOTE: Please refer [IPEX Launch Script Usage Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/2.0.100+cpu/tutorials/performance_tuning/launch_script.html) for usage of advanced features.

For advanced usage of WeNet, such as building Web/RPC/HTTP services, please refer [LibTorch Tutorial](../libtorch#advanced-usage). The difference is that the executables should be invoked via IPEX launch script `ipexrun`.
NOTE: Please refer [IPEX Launch Script Usage Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/2.3.0+cpu/tutorials/performance_tuning/launch_script.html) for usage of advanced features.

For advanced usage of WeNet, such as building Web/RPC/HTTP services, please refer [LibTorch Tutorial](../libtorch#advanced-usage). The difference is that the executables should be invoked via IPEX launch script `ipexrun`.
4 changes: 2 additions & 2 deletions runtime/ipex/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y git cmake wget build-essential python-is-python3 python3-pip google-perftools
RUN pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu
RUN pip install intel_extension_for_pytorch==2.0.100 pyyaml six intel-openmp
RUN pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu
RUN pip install intel_extension_for_pytorch==2.3.0 pyyaml six intel-openmp
RUN ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 /usr/lib/x86_64-linux-gnu/libtcmalloc.so

RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet
Expand Down

0 comments on commit 509d05d

Please sign in to comment.