From 6aedfd317f320146f3b8e8af2d05a39c5b83576e Mon Sep 17 00:00:00 2001 From: Manupa Karunaratne Date: Thu, 23 Jan 2025 06:39:08 +0000 Subject: [PATCH] [NFC][Vectorization] Refactor vector size inference out of the pass (#19768) Currently, the vector length inference lives inside the generic vectorization pass. However, we need to infer vector lengths when setting layouts for LLVMGPUVectorDistribute. This currently happens prior to generic vectorization. Therefore, this commit refactors inferSizesfromIR API into codegen utils to be able to generally use it where its needed. Signed-off-by: Manupa Karunaratne --- .../Codegen/Common/GenericVectorization.cpp | 221 +----------------- .../src/iree/compiler/Codegen/Utils/Utils.cpp | 202 ++++++++++++++++ .../src/iree/compiler/Codegen/Utils/Utils.h | 31 +++ 3 files changed, 234 insertions(+), 220 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp index 1715c785c5e6..adb1798dc3e7 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Codegen/Common/Passes.h" #include "iree/compiler/Codegen/Common/TileSizeSelection.h" +#include "iree/compiler/Codegen/Utils/Utils.h" #include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" #include "mlir/Dialect/Linalg/Transforms/Hoisting.h" @@ -28,226 +29,6 @@ namespace mlir::iree_compiler { namespace { -struct VectorizationTileSizes { - SmallVector destShape; - SmallVector vectorSizes; - SmallVector vectorScalableFlags; -}; - -/// Returns a VectorizationTileSizes which contains the inferred bounded result -/// shape and vector input sizes. This is useful to infer the sizes from a -/// chain. -static std::optional inferSizesFromIR(Value val); - -/// Tries to infer the vector sizes from an IR using ValueBounds analysis. If -/// `opResult` is provided, it stores the bounded result shapes to destShape. -/// Returns std::nullopt if vector sizes can't be inferred. -static std::optional -inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult) { - LLVM_DEBUG({ - VEC_DBGS() << "Inferring sizes for:\n" << linalgOp; - if (opResult) { - VEC_DBGS() << " with OpResult.resultNumber=" - << opResult->getResultNumber(); - } - VEC_DBGS() << '\n'; - }); - - std::optional vscaleRange; - if (!opResult) { - // Note: Inferring scalable sizes is not supported is `opResult` is set - // (which is used to compute sizes for tensor.pack/unpack). - auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(linalgOp); - vscaleRange = getDefaultVscaleRange(targetAttr); - } - - VectorizationTileSizes result; - unsigned numDims = linalgOp.getNumLoops(); - for (int dim = 0; dim < numDims; ++dim) { - // Map dimension `dim` to an operand dimension that we will use to - // traverse the U-D chain to get `dim` vector size information. - SmallVector> operandDimPairs; - linalgOp.mapIterationSpaceDimToAllOperandDims(dim, operandDimPairs); - if (operandDimPairs.empty()) { - return std::nullopt; - } - - Value firstOperand = operandDimPairs[0].first; - unsigned firstOperandDim = operandDimPairs[0].second; - - // Trivial case: `dim` size is available in the operand type. - int64_t dimSize = llvm::cast(firstOperand.getType()) - .getShape()[firstOperandDim]; - bool dimScalable = false; - if (!ShapedType::isDynamic(dimSize)) { - result.vectorSizes.push_back(dimSize); - result.vectorScalableFlags.push_back(dimScalable); - LLVM_DEBUG(VEC_DBGS() << "Inferred iteration size '" << dimSize - << "' for dimension '" << dim << "'\n"); - continue; - } - - // Use ValueBounds analysis to infer `dim` size upper bound. - FailureOr maybeDimBound; - for (auto operandDimPair : operandDimPairs) { - Value operand = operandDimPair.first; - unsigned operandDim = operandDimPair.second; - maybeDimBound = computeDimUpperBound(operand, operandDim, vscaleRange, - RoundUpVscaleMultiple::Yes); - if (succeeded(maybeDimBound)) { - break; - } - } - - if (failed(maybeDimBound)) { - return std::nullopt; - } - - dimSize = maybeDimBound->baseSize; - dimScalable = maybeDimBound->scalable; - result.vectorSizes.push_back(dimSize); - result.vectorScalableFlags.push_back(dimScalable); - - LLVM_DEBUG(VEC_DBGS() << "Inferred iteration size '" << dimSize - << (dimScalable ? " x vscale" : "") - << "' for dimension '" << dim << "'\n"); - } - - if (opResult) { - assert(!llvm::is_contained(result.vectorScalableFlags, true) && - "inferring scalable bounds with `opResult` not supported!"); - result.destShape = linalgOp.getIndexingMapMatchingResult(opResult.value()) - .compose(result.vectorSizes); - } - - return result; -} - -/// Returns the result sizes and vector input sizes of the tensor.pack op. The -/// inferred bounding size is returned if it is dynamic shape. Returns -/// std::nullopt if the shape inference failed. -static std::optional -inferSizesFromIR(tensor::PackOp op) { - LLVM_DEBUG(VEC_DBGS() << "Inferring dest sizes for:\n" << op << "\n"); - - if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) { - return !getConstantIntValue(v).has_value(); - })) { - LLVM_DEBUG(VEC_DBGS() << "skip, because inner_tiles are not all constant"); - return std::nullopt; - } - - VectorizationTileSizes result; - std::optional inferred = - inferSizesFromIR(op.getSource()); - if (!inferred) { - return std::nullopt; - } - result.vectorSizes = inferred.value().destShape; - - for (auto [dimPos, tileSize] : - llvm::zip_equal(op.getInnerDimsPos(), op.getStaticInnerTiles())) { - if (result.vectorSizes[dimPos] % tileSize != 0) { - return std::nullopt; - } - result.vectorSizes[dimPos] /= tileSize; - } - auto outerDimsPerm = op.getOuterDimsPerm(); - if (!outerDimsPerm.empty()) { - applyPermutationToVector(result.vectorSizes, outerDimsPerm); - } - - LLVM_DEBUG({ - VEC_DBGS() << "After adjustment with inner tiles and " - "outer_dims_perm:\n"; - for (auto [idx, val] : llvm::enumerate(result.vectorSizes)) { - llvm::dbgs() << "Dim #" << idx << ": " << val << "\n"; - } - }); - result.destShape = result.vectorSizes; - - return result; -} - -/// Returns the result sizes and vector input sizes of the tensor.unpack op. The -/// inferred bounding size is returned if it is dynamic shape. Returns -/// std::nullopt if the shape inference failed. -static std::optional -inferSizesFromIR(tensor::UnPackOp op) { - LLVM_DEBUG(VEC_DBGS() << "Inferring dest sizes for:\n" << op << "\n"); - - if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) { - return !getConstantIntValue(v).has_value(); - })) { - LLVM_DEBUG( - VEC_DBGS() - << "failed on inference because inner_tiles are not all constant"); - return std::nullopt; - } - - VectorizationTileSizes result; - std::optional inferred = - inferSizesFromIR(op.getSource()); - if (!inferred) { - return std::nullopt; - } - result.vectorSizes = inferred.value().destShape; - - result.vectorSizes.resize(op.getDestType().getRank()); - auto outerDimsPerm = op.getOuterDimsPerm(); - if (!outerDimsPerm.empty()) { - applyPermutationToVector(result.vectorSizes, - invertPermutationVector(outerDimsPerm)); - } - for (auto [dimPos, tileSize] : - llvm::zip_equal(op.getInnerDimsPos(), op.getStaticInnerTiles())) { - result.vectorSizes[dimPos] *= tileSize; - } - - LLVM_DEBUG({ - VEC_DBGS() << "After adjustment with inner tiles and " - "outer_dims_perm:\n"; - for (auto [idx, val] : llvm::enumerate(result.vectorSizes)) { - llvm::dbgs() << "Dim #" << idx << ": " << val << "\n"; - } - }); - result.destShape = result.vectorSizes; - - return result; -} - -/// See the documentation in the above function declaration. -static std::optional inferSizesFromIR(Value val) { - std::optional result; - TypeSwitch(val.getDefiningOp()) - .Case( - [&](auto op) { result = inferSizesFromIR(op, cast(val)); }) - .Case([&](auto op) { result = inferSizesFromIR(op); }) - .Case([&](tensor::ExtractSliceOp op) { - // tensor::ExtractSliceOp is not vectorizable, so only `destShape` has - // the values. - result = VectorizationTileSizes(); - LLVM_DEBUG(VEC_DBGS() << "Inferring sizes for:\n" << op << "\n"); - int64_t destRank = op.getResult().getType().getRank(); - for (int dim = 0; dim < destRank; ++dim) { - LLVM_DEBUG(VEC_DBGS() << "Dim #" << dim << ": "); - FailureOr maybeDimBound = - ValueBoundsConstraintSet::computeConstantBound( - presburger::BoundType::UB, {op, dim}, - /*stopCondition=*/nullptr, /*closedUB=*/true); - if (failed(maybeDimBound)) { - LLVM_DEBUG(llvm::dbgs() << "failed\n"); - result = std::nullopt; - return; - } - LLVM_DEBUG(llvm::dbgs() << maybeDimBound.value() << "\n"); - result->destShape.push_back(maybeDimBound.value()); - } - }) - .Default([&](Operation *) {}); - return result; -} - // Returns the vector sizes from the local lowering config or try to infer them // from the tensor shapes and tiled loops in the IR. static std::optional diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp index ddc0b9a52070..e23fe37e90e5 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp @@ -1588,4 +1588,206 @@ bool isFullSlice(OffsetSizeAndStrideOpInterface sliceLoadStoreOp, sliceLoadStoreOp.getMixedStrides(), tensorType, dynamicDims); } +//===----------------------------------------------------------------------===// +// Utility functions for vector size inference for dynamic shapes +//===----------------------------------------------------------------------===// + +std::optional +inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult) { + LLVM_DEBUG({ + llvm::dbgs() << "Inferring sizes for:\n" << linalgOp; + if (opResult) { + llvm::dbgs() << " with OpResult.resultNumber=" + << opResult->getResultNumber(); + } + llvm::dbgs() << '\n'; + }); + + std::optional vscaleRange; + if (!opResult) { + // Note: Inferring scalable sizes is not supported is `opResult` is set + // (which is used to compute sizes for tensor.pack/unpack). + auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(linalgOp); + vscaleRange = getDefaultVscaleRange(targetAttr); + } + + VectorizationTileSizes result; + unsigned numDims = linalgOp.getNumLoops(); + for (int dim = 0; dim < numDims; ++dim) { + // Map dimension `dim` to an operand dimension that we will use to + // traverse the U-D chain to get `dim` vector size information. + SmallVector> operandDimPairs; + linalgOp.mapIterationSpaceDimToAllOperandDims(dim, operandDimPairs); + if (operandDimPairs.empty()) { + return std::nullopt; + } + + Value firstOperand = operandDimPairs[0].first; + unsigned firstOperandDim = operandDimPairs[0].second; + + // Trivial case: `dim` size is available in the operand type. + int64_t dimSize = llvm::cast(firstOperand.getType()) + .getShape()[firstOperandDim]; + bool dimScalable = false; + if (!ShapedType::isDynamic(dimSize)) { + result.vectorSizes.push_back(dimSize); + result.vectorScalableFlags.push_back(dimScalable); + LLVM_DEBUG(llvm::dbgs() << "Inferred iteration size '" << dimSize + << "' for dimension '" << dim << "'\n"); + continue; + } + + // Use ValueBounds analysis to infer `dim` size upper bound. + FailureOr maybeDimBound; + for (auto operandDimPair : operandDimPairs) { + Value operand = operandDimPair.first; + unsigned operandDim = operandDimPair.second; + maybeDimBound = computeDimUpperBound(operand, operandDim, vscaleRange, + RoundUpVscaleMultiple::Yes); + if (succeeded(maybeDimBound)) { + break; + } + } + + if (failed(maybeDimBound)) { + return std::nullopt; + } + + dimSize = maybeDimBound->baseSize; + dimScalable = maybeDimBound->scalable; + result.vectorSizes.push_back(dimSize); + result.vectorScalableFlags.push_back(dimScalable); + + LLVM_DEBUG(llvm::dbgs() << "Inferred iteration size '" << dimSize + << (dimScalable ? " x vscale" : "") + << "' for dimension '" << dim << "'\n"); + } + + if (opResult) { + assert(!llvm::is_contained(result.vectorScalableFlags, true) && + "inferring scalable bounds with `opResult` not supported!"); + result.destShape = linalgOp.getIndexingMapMatchingResult(opResult.value()) + .compose(result.vectorSizes); + } + + return result; +} + +std::optional inferSizesFromIR(tensor::PackOp op) { + LLVM_DEBUG(llvm::dbgs() << "Inferring dest sizes for:\n" << op << "\n"); + + if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) { + return !getConstantIntValue(v).has_value(); + })) { + LLVM_DEBUG(llvm::dbgs() + << "skip, because inner_tiles are not all constant"); + return std::nullopt; + } + + VectorizationTileSizes result; + std::optional inferred = + inferSizesFromIR(op.getSource()); + if (!inferred) { + return std::nullopt; + } + result.vectorSizes = inferred.value().destShape; + + for (auto [dimPos, tileSize] : + llvm::zip_equal(op.getInnerDimsPos(), op.getStaticInnerTiles())) { + if (result.vectorSizes[dimPos] % tileSize != 0) { + return std::nullopt; + } + result.vectorSizes[dimPos] /= tileSize; + } + auto outerDimsPerm = op.getOuterDimsPerm(); + if (!outerDimsPerm.empty()) { + applyPermutationToVector(result.vectorSizes, outerDimsPerm); + } + + LLVM_DEBUG({ + llvm::dbgs() << "After adjustment with inner tiles and " + "outer_dims_perm:\n"; + for (auto [idx, val] : llvm::enumerate(result.vectorSizes)) { + llvm::dbgs() << "Dim #" << idx << ": " << val << "\n"; + } + }); + result.destShape = result.vectorSizes; + + return result; +} + +std::optional inferSizesFromIR(tensor::UnPackOp op) { + LLVM_DEBUG(llvm::dbgs() << "Inferring dest sizes for:\n" << op << "\n"); + + if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) { + return !getConstantIntValue(v).has_value(); + })) { + LLVM_DEBUG( + llvm::dbgs() + << "failed on inference because inner_tiles are not all constant"); + return std::nullopt; + } + + VectorizationTileSizes result; + std::optional inferred = + inferSizesFromIR(op.getSource()); + if (!inferred) { + return std::nullopt; + } + result.vectorSizes = inferred.value().destShape; + + result.vectorSizes.resize(op.getDestType().getRank()); + auto outerDimsPerm = op.getOuterDimsPerm(); + if (!outerDimsPerm.empty()) { + applyPermutationToVector(result.vectorSizes, + invertPermutationVector(outerDimsPerm)); + } + for (auto [dimPos, tileSize] : + llvm::zip_equal(op.getInnerDimsPos(), op.getStaticInnerTiles())) { + result.vectorSizes[dimPos] *= tileSize; + } + + LLVM_DEBUG({ + llvm::dbgs() << "After adjustment with inner tiles and " + "outer_dims_perm:\n"; + for (auto [idx, val] : llvm::enumerate(result.vectorSizes)) { + llvm::dbgs() << "Dim #" << idx << ": " << val << "\n"; + } + }); + result.destShape = result.vectorSizes; + + return result; +} + +std::optional inferSizesFromIR(Value val) { + std::optional result; + TypeSwitch(val.getDefiningOp()) + .Case( + [&](auto op) { result = inferSizesFromIR(op, cast(val)); }) + .Case([&](auto op) { result = inferSizesFromIR(op); }) + .Case([&](tensor::ExtractSliceOp op) { + // tensor::ExtractSliceOp is not vectorizable, so only `destShape` has + // the values. + result = VectorizationTileSizes(); + LLVM_DEBUG(llvm::dbgs() << "Inferring sizes for:\n" << op << "\n"); + int64_t destRank = op.getResult().getType().getRank(); + for (int dim = 0; dim < destRank; ++dim) { + LLVM_DEBUG(llvm::dbgs() << "Dim #" << dim << ": "); + FailureOr maybeDimBound = + ValueBoundsConstraintSet::computeConstantBound( + presburger::BoundType::UB, {op, dim}, + /*stopCondition=*/nullptr, /*closedUB=*/true); + if (failed(maybeDimBound)) { + LLVM_DEBUG(llvm::dbgs() << "failed\n"); + result = std::nullopt; + return; + } + LLVM_DEBUG(llvm::dbgs() << maybeDimBound.value() << "\n"); + result->destShape.push_back(maybeDimBound.value()); + } + }) + .Default([&](Operation *) {}); + return result; +} + } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.h b/compiler/src/iree/compiler/Codegen/Utils/Utils.h index ea3d06956a27..ea83c9f8de4d 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.h +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.h @@ -252,6 +252,37 @@ bool isFullSlice(OffsetSizeAndStrideOpInterface sliceLoadStoreOp, IREE::Flow::DispatchTensorType tensorType, ValueRange dynamicDims); +//===----------------------------------------------------------------------===// +// Utility functions for vector size inference for dynamic shapes +//===----------------------------------------------------------------------===// + +struct VectorizationTileSizes { + SmallVector destShape; + SmallVector vectorSizes; + SmallVector vectorScalableFlags; +}; + +/// Returns a VectorizationTileSizes which contains the inferred bounded result +/// shape and vector input sizes. This is useful to infer the sizes from a +/// chain. +std::optional inferSizesFromIR(Value val); + +/// Returns the result sizes and vector input sizes of the tensor.unpack op. The +/// inferred bounding size is returned if it is dynamic shape. Returns +/// std::nullopt if the shape inference failed. +std::optional inferSizesFromIR(tensor::UnPackOp op); + +/// Returns the result sizes and vector input sizes of the tensor.pack op. The +/// inferred bounding size is returned if it is dynamic shape. Returns +/// std::nullopt if the shape inference failed. +std::optional inferSizesFromIR(tensor::PackOp op); + +/// Tries to infer the vector sizes from an IR using ValueBounds analysis. If +/// `opResult` is provided, it stores the bounded result shapes to destShape. +/// Returns std::nullopt if vector sizes can't be inferred. +std::optional +inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult); + } // namespace mlir::iree_compiler #endif // IREE_COMPILER_CODEGEN_UTILS_UTILS_H_