From f53923dda9232ed3a84f74f6ca7600826b627a28 Mon Sep 17 00:00:00 2001 From: Alan Li Date: Tue, 3 Dec 2024 09:49:29 +0000 Subject: [PATCH] [i1] Implement `packed_storage` layout encoding attribute * make `packed_storage` as a type of `iree_encoding` attribute, and make type converters accept it. * `i1` tensors with `#iree_encoding.packed_storage` will be interpreted as packed i1 type, same as specifying `--iree-experimental-packed-i1-storage`. * `--iree-experimental-packed-i1-storage` are kept for testing purposes. We can drop this option after frontend enables emitting `i1` tensors with attributes. Signed-off-by: Alan Li --- .../compiler/Codegen/Common/EncodingUtils.cpp | 6 ++- .../Dialect/Encoding/IR/EncodingAttrs.cpp | 4 ++ .../Dialect/Encoding/IR/EncodingAttrs.td | 11 ++++ .../Dialect/Encoding/IR/EncodingTypes.h | 3 ++ .../Conversion/HALToStream/Patterns.cpp | 9 ++++ .../Stream/Transforms/EncodeTensors.cpp | 3 +- .../test/encode_host_tensors_packing_i1.mlir | 8 +-- .../compiler/Utils/ElementPackingUtils.cpp | 53 ++++++++++++++----- .../iree/compiler/Utils/ElementPackingUtils.h | 11 ++++ 9 files changed, 90 insertions(+), 18 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp index e3ca734a964b..fb77a2656705 100644 --- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Codegen/Common/EncodingUtils.h" #include "iree/compiler/Codegen/Dialect/Codegen/Utils/Utils.h" +#include "iree/compiler/Dialect/Encoding/IR/EncodingTypes.h" #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Utils/IndexingUtils.h" @@ -64,7 +65,10 @@ MaterializeEncodingConversionTarget::MaterializeEncodingConversionTarget( markUnknownOpDynamicallyLegal([](Operation *op) { auto typeHasEncoding = [](Type t) -> bool { auto tensorType = dyn_cast(t); - return tensorType && tensorType.getEncoding(); + if (!(tensorType && tensorType.getEncoding())) + return false; + // Allow iree_encoding::packed_storage to pass through. + return !IREE::Encoding::hasPackedStorageAttr(tensorType); }; auto valueHasEncoding = [=](Value v) -> bool { return typeHasEncoding(v.getType()); diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp index 593d9b8fc5c6..26d7d23d6d92 100644 --- a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp +++ b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp @@ -243,6 +243,10 @@ EncodingAttr getEncodingAttr(RankedTensorType type) { return dyn_cast_or_null(type.getEncoding()); } +bool hasPackedStorageAttr(RankedTensorType type) { + return dyn_cast_or_null(type.getEncoding()) != nullptr; +} + FailureOr getEncodingContractionDims(EncodingAttr encoding) { auto indexingMapsAttr = encoding.getUserIndexingMaps(); diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td index 54829b68e2cf..9b8996e11e5f 100644 --- a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td +++ b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.td @@ -41,6 +41,17 @@ def EncodingOpType : IREEEncoding_I32EnumAttr<"EncodingOpType", def EncodingOpTypeAttr: IREEEncoding_EnumAttr; + +def PackedStorageAttr : IREEEncoding_Attr<"PackedStorage"> { + let mnemonic = "packed_storage"; + let summary = [{Indicates packed storage datatype.}]; + let description = [{ + This attribute indicates this is a back-to-back packed storage in memory. + This attribute takes no arguments. + }]; + let genVerifyDecl = 0; +} + def EncodingAttr : IREEEncoding_Attr<"Encoding", [ DeclareAttrInterfaceMethods getEncodingContractionDims(EncodingAttr encoding); diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp index 76eef8b8e56f..cf86856e395e 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp @@ -12,6 +12,10 @@ #include "iree/compiler/Dialect/Stream/IR/StreamOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" +namespace mlir::iree_compiler::IREE::Encoding { +bool hasPackedStorageAttr(mlir::RankedTensorType); +} // namespace mlir::iree_compiler::IREE::Encoding + namespace mlir::iree_compiler { namespace { @@ -90,6 +94,11 @@ struct ConvertTensorImportOp RankedTensorType tensorType, ValueRange dynamicDims, OpBuilder &builder) { + // If the encoding attr is about packed storage then we don't need all this + if (IREE::Encoding::hasPackedStorageAttr(tensorType)) { + return success(); + } + auto expectedElementType = builder.create( loc, tensorType.getElementType()); auto expectedEncodingType = builder.create( diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp index 5cb95099bbbc..42b359784d0c 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp @@ -46,7 +46,8 @@ static LogicalResult checkEncoding(Operation *op, RankedTensorType encodingType, ValueRange encodingDims, PatternRewriter &rewriter) { auto encoding = encodingType.getEncoding(); - if (encoding && !llvm::isa(encoding)) { + if (encoding && !llvm::isa(encoding)) { return rewriter.notifyMatchFailure(op, [=](Diagnostic &d) { d << "unsupported tensor encoding: " << encodingType; }); diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1.mlir index 12527ae139b1..eefc9810aed5 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing_i1.mlir @@ -1,7 +1,8 @@ -// RUN: iree-opt --split-input-file --iree-stream-encode-host-tensors --iree-experimental-packed-i1-storage %s | FileCheck %s +// RUN: iree-opt --split-input-file --iree-stream-encode-host-tensors %s | FileCheck %s +#packed = #iree_encoding.packed_storage func.func @unaligned_i1_size() -> index { - %0 = stream.tensor.sizeof tensor<12xi1> : index + %0 = stream.tensor.sizeof tensor<12xi1, #packed> : index return %0 : index } // CHECK: func @unaligned_i1_size() -> index { @@ -10,8 +11,9 @@ func.func @unaligned_i1_size() -> index { // ----- +#packed = #iree_encoding.packed_storage func.func @aligned_i1_size() -> index { - %0 = stream.tensor.sizeof tensor<24xi1> : index + %0 = stream.tensor.sizeof tensor<24xi1, #packed> : index return %0 : index } diff --git a/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp b/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp index 2f92ffa62913..2d53b8c47a38 100644 --- a/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp +++ b/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp @@ -15,17 +15,28 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/IR/BuiltinTypes.h" -namespace mlir::iree_compiler { - llvm::cl::opt clEnableI1Support( "iree-experimental-packed-i1-storage", llvm::cl::desc( - "Experimental feature: enable i1 data type support in codegen"), + "Experimental feature: force to use packed storage for i1 tensors." + "Turning on this option will see i1 tensors as if it has " + "#iree_encoding.packed_storage attribute." + "This is to allow an alternative way to test the packed storage " + "feature before frontend can emit packed i1 tensors." + "This option can be dropped once the frontend can emit packed i1 " + "tensors."), llvm::cl::init(false)); +namespace mlir::iree_compiler { + bool needToPackSubByteElementBitWidth(unsigned bitWidth) { + return needToPackSubByteElementBitWidth( + bitWidth, /*isPackedStorage=*/clEnableI1Support); +} + +bool needToPackSubByteElementBitWidth(unsigned bitWidth, bool isPackedStorage) { // Enable i1 support if requested. - if (clEnableI1Support && bitWidth == 1) { + if (isPackedStorage && bitWidth == 1) { return true; } // Require the original bit width to be some power of two for now to avoid @@ -37,10 +48,20 @@ bool needToPackSubByteElementBitWidth(unsigned bitWidth) { bool needToPackSubByteElements(RankedTensorType shapedType) { unsigned bitWidth = IREE::Util::getTypeBitWidth(shapedType.getElementType()); - return needToPackSubByteElementBitWidth(bitWidth); + // Two paths to enable packed storage for i1 tensors: the attribute or cl + // option. The cl option will be dropped once frontend supports emitting + // tensors with attributes. + bool isPackedStorage = + IREE::Encoding::hasPackedStorageAttr(shapedType) || clEnableI1Support; + return needToPackSubByteElementBitWidth(bitWidth, isPackedStorage); } Type legalizeStorageElementType(Type elementType) { + return legalizeStorageElementType(elementType, + /*isPackedStorage=*/clEnableI1Support); +} + +Type legalizeStorageElementType(Type elementType, bool isPackedStorage) { // Only handle integers; floats in MLIR all have aligned widths (today). auto intType = dyn_cast(elementType); if (!intType) @@ -48,7 +69,7 @@ Type legalizeStorageElementType(Type elementType) { // For sub-byte elements, default to pack them into bytes. unsigned bitWidth = intType.getWidth(); - if (needToPackSubByteElementBitWidth(bitWidth)) + if (needToPackSubByteElementBitWidth(bitWidth, isPackedStorage)) return elementType; // Otherwise, extend them to the next power-of-two bit width. @@ -72,13 +93,16 @@ Value calculateStorageElementCountInBytes(Location loc, loc, builder, shapedType, dynamicDims); } + // TODO: remove cl options once frontend can emit packed i1 tensors. + bool isPackedStorage = + IREE::Encoding::hasPackedStorageAttr(shapedType) || clEnableI1Support; Type alignedElementType = - legalizeStorageElementType(shapedType.getElementType()); + legalizeStorageElementType(shapedType.getElementType(), isPackedStorage); unsigned elementBits = IREE::Util::getTypeBitWidth(alignedElementType); // Calculate all static dims first, if any. int64_t staticCount = 1; - if (!needToPackSubByteElementBitWidth(elementBits)) { + if (!needToPackSubByteElementBitWidth(elementBits, isPackedStorage)) { staticCount *= IREE::Util::getRoundedElementByteWidth(alignedElementType); } @@ -93,13 +117,13 @@ Value calculateStorageElementCountInBytes(Location loc, value = builder.createOrFold(loc, value, dim); } // Sub-byte packing requires putting multiple elements in the same byte. - if (needToPackSubByteElementBitWidth(elementBits)) { + if (needToPackSubByteElementBitWidth(elementBits, isPackedStorage)) { assert(8 % elementBits == 0); unsigned byteElements = 8 / elementBits; // TODO(antiagainst): We may want to emit runtime check to make sure this is // divisible. auto divisor = builder.create(loc, byteElements); - if (!clEnableI1Support && dynamicDims.empty() && + if (!isPackedStorage && dynamicDims.empty() && (staticCount * elementBits) % 8 != 0) { return nullptr; } @@ -113,12 +137,15 @@ Value calculateStorageElementOffsetInBytes(Location loc, RankedTensorType originalType, Value linearizedIndex, OpBuilder &builder) { - Type alignedElementType = - legalizeStorageElementType(originalType.getElementType()); + // TODO: remove cl options once frontend can emit packed i1 tensors. + bool isPackedStorage = + IREE::Encoding::hasPackedStorageAttr(originalType) || clEnableI1Support; + Type alignedElementType = legalizeStorageElementType( + originalType.getElementType(), isPackedStorage); unsigned elementBits = IREE::Util::getTypeBitWidth(alignedElementType); // Sub-byte packing requires putting multiple elements in the same byte. - if (needToPackSubByteElementBitWidth(elementBits)) { + if (needToPackSubByteElementBitWidth(elementBits, isPackedStorage)) { Value byteElements = builder.create(loc, 8 / elementBits); // TODO(antiagainst): We may want to emit runtime check to make sure this is diff --git a/compiler/src/iree/compiler/Utils/ElementPackingUtils.h b/compiler/src/iree/compiler/Utils/ElementPackingUtils.h index 6d5d18dcc718..5dfa37381d3d 100644 --- a/compiler/src/iree/compiler/Utils/ElementPackingUtils.h +++ b/compiler/src/iree/compiler/Utils/ElementPackingUtils.h @@ -15,7 +15,13 @@ namespace mlir::iree_compiler { /// Returns true if the given |bitWidth|, if appearing at runtime-kernel /// interface, is less than a byte that should be tightly packed together. +bool needToPackSubByteElementBitWidth(unsigned bitWidth, bool isPackedStorage); + +/// Temporary wrapper for the above function. `isPackedStorage` will be +/// determined by the cl option. This allows enabling packed storage for i1 +/// in both attribute and cl option ways. bool needToPackSubByteElementBitWidth(unsigned bitWidth); + /// Returns true if the given |shapedType|, if appearing at runtime-kernel /// interface, has sub-byte element types that should be tightly packed /// together. @@ -27,6 +33,11 @@ bool needToPackSubByteElements(RankedTensorType shapedType); /// runtime and kernel. For such cases, we perform tight packing for supported /// sub-byte elements, and expand to the next power-of-two bit width for other /// cases. +Type legalizeStorageElementType(Type elementType, bool isPackedStorage); + +/// Temporary wrapper for the above function. `isPackedStorage` will be +/// determined by the cl option. This allows enabling packed storage for i1 +/// in both attribute and cl option ways. Type legalizeStorageElementType(Type elementType); /// Emits IR with the given |builder| to calculate the total number of bytes