diff --git a/3rdparty/flashinfer b/3rdparty/flashinfer index 920672776a..04deac2101 160000 --- a/3rdparty/flashinfer +++ b/3rdparty/flashinfer @@ -1 +1 @@ -Subproject commit 920672776a2bf2244acf7a2e0516f46be9e93b15 +Subproject commit 04deac2101468251913fe8dbb480f54a6baf0a33 diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ecc6f0a6b..9fc91c1238 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -949,8 +949,6 @@ if (USE_FLASHINFER STREQUAL "ON") message(STATUS "Build with FlashInfer") set(FLASHINFER_TVM_BINDING ON) set(FLASHINFER_TVM_HOME ${PROJECT_SOURCE_DIR}) - set(FLASHINFER_ENABLE_FP8 OFF) - set(FLASHINFER_ENABLE_BF16 OFF) set(FLASHINFER_PREFILL OFF) set(FLASHINFER_DECODE OFF) set(FLASHINFER_PAGE OFF) diff --git a/cmake/config.cmake b/cmake/config.cmake index 9207204997..d28c208815 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -439,6 +439,18 @@ set(USE_GTEST AUTO) # Need to have USE_CUDA=ON set(USE_CUTLASS OFF) +# Whether to enable FlashInfer or not. +set(USE_FLASHINFER OFF) +# Options for FlashInfer kernel compilation. +set(FLASHINFER_ENABLE_FP8 OFF) +set(FLASHINFER_ENABLE_BF16 OFF) +set(FLASHINFER_GEN_GROUP_SIZES 1 4 6 8) +set(FLASHINFER_GEN_HEAD_DIMS 128) +set(FLASHINFER_GEN_KV_LAYOUTS 1) +set(FLASHINFER_GEN_POS_ENCODING_MODES 0 1) +set(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false") +set(FLASHINFER_GEN_CASUALS "false" "true") + # Enable to show a summary of TVM options set(SUMMARIZE OFF)