Merge pull request #45 from JuliaReach/schillic/layers

Add more layer types
JuliaReach · May 25, 2024 · 8c0a614 · 8c0a614
2 parents c2f6420 + 216a8ac
commit 8c0a614
Show file tree

Hide file tree

Showing 17 changed files with 525 additions and 2 deletions.
diff --git a/Project.toml b/Project.toml
@@ -3,12 +3,16 @@ uuid = "02ac4b2c-022a-44aa-84a5-ea45a5754bcc"
 version = "0.2.2"
 
 [deps]
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 ReachabilityBase = "379f33d0-9447-4353-bd03-d664070e549f"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
+LinearAlgebra = "<0.0.1, 1.6"
 ReachabilityBase = "0.1.1 - 0.2"
 Reexport = "0.2, 1"
 Requires = "0.5, 1"
+Statistics = "<0.0.1, 1.6"
 julia = "1.6"
diff --git a/docs/src/lib/Architecture.md b/docs/src/lib/Architecture.md
@@ -53,10 +53,20 @@ dim_out(::AbstractLayerOp)
 ControllerFormats.Architecture.dim(::AbstractLayerOp)
 ```
 
+#### More specific layer interfaces
+
+```@docs
+AbstractPoolingLayerOp
+```
+
 #### Implementation
 
 ```@docs
 DenseLayerOp
+ConvolutionalLayerOp
+FlattenLayerOp
+MaxPoolingLayerOp
+MeanPoolingLayerOp
 ```
 
 ### Activation functions

diff --git a/src/Architecture/Architecture.jl b/src/Architecture/Architecture.jl
@@ -6,15 +6,22 @@ Module containing data structures to represent controllers.
 module Architecture
 
 using Requires
+using LinearAlgebra: dot
+using Statistics: mean
 
+import Base: size
 export AbstractNeuralNetwork, FeedforwardNetwork,
-       AbstractLayerOp, DenseLayerOp,
+       AbstractLayerOp, DenseLayerOp, ConvolutionalLayerOp, FlattenLayerOp,
+       AbstractPoolingLayerOp, MaxPoolingLayerOp, MeanPoolingLayerOp,
        layers, dim_in, dim_out,
        ActivationFunction, Id, ReLU, Sigmoid, Tanh, LeakyReLU
 
 include("ActivationFunction.jl")
 include("LayerOps/AbstractLayerOp.jl")
 include("LayerOps/DenseLayerOp.jl")
+include("LayerOps/ConvolutionalLayerOp.jl")
+include("LayerOps/FlattenLayerOp.jl")
+include("LayerOps/PoolingLayerOp.jl")
 include("NeuralNetworks/AbstractNeuralNetwork.jl")
 include("NeuralNetworks/FeedforwardNetwork.jl")
 

diff --git a/src/Architecture/LayerOps/ConvolutionalLayerOp.jl b/src/Architecture/LayerOps/ConvolutionalLayerOp.jl
@@ -0,0 +1,154 @@
+"""
+    ConvolutionalLayerOp{F, M, B} <: AbstractLayerOp
+
+A convolutional layer operation is a series of filters, each of which computes a
+small affine map followed by an activation function.
+
+### Fields
+
+- `weights`    -- vector with one weight matrix for each filter
+- `bias`       -- vector with one bias value for each filter
+- `activation` -- activation function
+
+### Notes
+
+Conversion from a `Flux.Conv` is supported.
+"""
+struct ConvolutionalLayerOp{F,W,B} <: AbstractLayerOp
+    weights::W
+    bias::B
+    activation::F
+
+    function ConvolutionalLayerOp(weights::W, bias::B, activation::F;
+                                  validate=Val(true)) where {F,W,B}
+        if validate isa Val{true} && !_isconsistent_ConvolutionalLayerOp(weights, bias)
+            throw(ArgumentError("inconsistent filter dimensions: weights " *
+                                "($(length(weights))) and biases ($(length(bias)))"))
+        end
+
+        return new{F,W,B}(weights, bias, activation)
+    end
+end
+
+function _isconsistent_ConvolutionalLayerOp(weights, bias)
+    if length(weights) != length(bias)
+        return false
+    elseif length(bias) == 0
+        return false
+    end
+    @inbounds begin
+        s = size(first(weights))
+        if length(s) != 3 || s[1] == 0 || s[2] == 0 || s[3] == 0
+            return false
+        end
+        for e in weights
+            if size(e) != s
+                return false
+            end
+        end
+    end
+    return true
+end
+
+n_filters(L::ConvolutionalLayerOp) = length(L.bias)
+
+kernel(L::ConvolutionalLayerOp) = @inbounds size(first(L.weights))
+
+# application to a tensor
+function (L::ConvolutionalLayerOp)(T)
+    s = size(T)
+    if length(s) != 3
+        throw(ArgumentError("a convolutional layer requires at least two dimensions, but got $s"))
+    end
+    p, q, r = kernel(L)
+    @inbounds begin
+        if p > s[1] || q > s[2] || r != s[3]
+            throw(ArgumentError("convolution with kernel size $(kernel(L)) " *
+                                "does not apply to a tensor of dimension $s"))
+        end
+        d1 = s[1] - p + 1
+        d2 = s[2] - q + 1
+    end
+    t = n_filters(L)
+    s = (d1, d2, t)
+    O = similar(T, s)
+    @inbounds for f in 1:t
+        W = L.weights[f]
+        b = L.bias[f]
+        for k in 1:r
+            for j in 1:d2
+                for i in 1:d1
+                    T′ = view(T, i:(i + p - 1), j:(j + q - 1), k)
+                    O[i, j, f] = L.activation(dot(W, T′) + b)
+                end
+            end
+        end
+    end
+    return O
+end
+
+function Base.:(==)(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp)
+    return L1.weights == L2.weights &&
+           L1.bias == L2.bias &&
+           L1.activation == L2.activation
+end
+
+function Base.:isapprox(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp; atol::Real=0,
+                        rtol=nothing)
+    if isnothing(rtol)
+        if iszero(atol)
+            N = @inbounds promote_type(eltype(first(L1.weights)), eltype(first(L2.weights)),
+                                       eltype(L1.bias), eltype(L2.bias))
+            rtol = Base.rtoldefault(N)
+        else
+            rtol = zero(atol)
+        end
+    end
+    return isapprox(L1.weights, L2.weights; atol=atol, rtol=rtol) &&
+           isapprox(L1.bias, L2.bias; atol=atol, rtol=rtol) &&
+           L1.activation == L2.activation
+end
+
+function Base.show(io::IO, L::ConvolutionalLayerOp)
+    str = "$(string(ConvolutionalLayerOp)) of $(n_filters(L)) filters with " *
+          "kernel size $(kernel(L)) and $(typeof(L.activation)) activation"
+    return print(io, str)
+end
+
+size(::ConvolutionalLayerOp) = (3, 3)
+
+function load_Flux_convert_Conv_layer()
+    return quote
+        function Base.convert(::Type{ConvolutionalLayerOp}, layer::Flux.Conv)
+            if !all(isone, layer.stride)
+                throw(ArgumentError("stride $(layer.stride) != 1 is not supported"))  # COV_EXCL_LINE
+            end
+            if !all(iszero, layer.pad)
+                throw(ArgumentError("pad $(layer.pad) != 0 is not supported"))  # COV_EXCL_LINE
+            end
+            if !all(isone, layer.dilation)
+                throw(ArgumentError("dilation $(layer.dilation) != 1 is not supported"))  # COV_EXCL_LINE
+            end
+            if !all(isone, layer.groups)
+                throw(ArgumentError("groups $(layer.groups) != 1 is not supported"))  # COV_EXCL_LINE
+            end
+            act = get(activations_Flux, layer.σ, nothing)
+            if isnothing(act)
+                throw(ArgumentError("unsupported activation function $(layer.σ)"))  # COV_EXCL_LINE
+            end
+            # Flux stores a 4D matrix instead of a vector of 3D matrices
+            weights = @inbounds [layer.weight[:, :, :, i] for i in 1:size(layer.weight, 4)]
+            return ConvolutionalLayerOp(weights, layer.bias, act)
+        end
+
+        function Base.convert(::Type{Flux.Conv}, layer::ConvolutionalLayerOp)
+            act = get(activations_Flux, layer.activation, nothing)
+            if isnothing(act)
+                throw(ArgumentError("unsupported activation function $(layer.activation)"))  # COV_EXCL_LINE
+            end
+            # Flux stores a 4D matrix instead of a vector of 3D matrices
+            weights = cat(layer.weights...; dims=4)
+            return Flux.Conv(weights, layer.bias, act)
+        end
+    end
+end
diff --git a/src/Architecture/LayerOps/DenseLayerOp.jl b/src/Architecture/LayerOps/DenseLayerOp.jl
@@ -73,6 +73,8 @@ dim_in(L::DenseLayerOp) = size(L.weights, 2)
 
 dim_out(L::DenseLayerOp) = length(L.bias)
 
+size(::DenseLayerOp) = (1, 1)
+
 function load_Flux_convert_Dense_layer()
     return quote
         function Base.convert(::Type{DenseLayerOp}, layer::Flux.Dense)

diff --git a/src/Architecture/LayerOps/FlattenLayerOp.jl b/src/Architecture/LayerOps/FlattenLayerOp.jl
@@ -0,0 +1,46 @@
+"""
+    FlattenLayerOp <: AbstractLayerOp
+
+A flattening layer operation converts a multidimensional tensor into a vector.
+
+### Notes
+
+The implementation uses row-major ordering for convenience with the
+machine-learning literature.
+
+```@jldoctest
+julia> T = reshape([1, 3, 2, 4, 5, 7, 6, 8], (2, 2, 2))
+2×2×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2
+ 3  4
+
+[:, :, 2] =
+ 5  6
+ 7  8
+
+julia> FlattenLayerOp()(T)
+8-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+```
+"""
+struct FlattenLayerOp <: AbstractLayerOp
+end
+
+# application to a vector (swap to row-major convention)
+function (L::FlattenLayerOp)(T)
+    s = size(T)
+    if length(s) == 1
+        return vec(T)
+    end
+    return vec(permutedims(T, (2, 1, 3:length(s)...)))
+end
+
+size(::FlattenLayerOp) = (nothing, 1)
diff --git a/src/Architecture/LayerOps/PoolingLayerOp.jl b/src/Architecture/LayerOps/PoolingLayerOp.jl
@@ -0,0 +1,94 @@
+"""
+    AbstractPoolingLayerOp <: AbstractLayerOp
+
+Abstract type for pooling layer operations.
+
+### Notes
+
+Pooling is an operation on a three-dimensional tensor that iterates over the
+first two dimensions in a window and aggregates the values, thus reducing the
+output dimension.
+
+### Implementation
+
+The following (unexported) functions should be implemented:
+
+- `window(::AbstractPoolingLayerOp)`   -- return the pair ``(p, q)`` representing the window size
+- `aggregation(::AbstractPoolingLayerOp)` -- return the aggregation function (applied to a tensor)
+"""
+abstract type AbstractPoolingLayerOp <: AbstractLayerOp end
+
+for (type_name, normal_name, agg_function, agg_name) in
+    ((:MaxPoolingLayerOp, "max", maximum, "maximum"),
+     (:MeanPoolingLayerOp, "mean", mean, "Statistics.mean"))
+    @eval begin
+        @doc """
+            $($type_name) <: AbstractPoolingLayerOp
+
+        A $($normal_name)-pooling layer operation. The aggregation function is
+        `$($agg_name)`.
+
+        ### Fields
+
+        - `p` -- horizontal window size
+        - `q` -- vertical window size
+        """
+        struct $type_name <: AbstractPoolingLayerOp
+            p::Int
+            q::Int
+
+            function $type_name(p::Int, q::Int; validate=Val(true))
+                if validate isa Val{true} && (p <= 0 || q <= 0)
+                    throw(ArgumentError("inconsistent window size ($p, $q)"))
+                end
+                return new(p, q)
+            end
+        end
+
+        window(L::$type_name) = (L.p, L.q)
+
+        aggregation(::$type_name) = $agg_function
+
+        function Base.:(==)(L1::$type_name, L2::$type_name)
+            return window(L1) == window(L2)
+        end
+
+        function Base.show(io::IO, L::$type_name)
+            str = "$(string($type_name)) for $($normal_name)-pooling of window " *
+                  "size $(window(L))"
+            return print(io, str)
+        end
+    end
+end
+
+# application to a tensor
+function (L::AbstractPoolingLayerOp)(T)
+    s = size(T)
+    if length(s) != 3
+        throw(ArgumentError("a pooling layer requires a three-dimensional input, but got $s"))
+    end
+    p, q = window(L)
+    @inbounds begin
+        if mod(s[1], p) != 0 || mod(s[2], q) != 0
+            throw(ArgumentError("pooling with window size ($p, $q) does " *
+                                "not apply to a tensor of dimension $s"))
+        end
+        d1 = div(s[1], p)
+        d2 = div(s[2], q)
+        d3 = s[3]
+    end
+    s = (d1, d2, d3)
+    O = similar(T, s)
+    aggregate = aggregation(L)
+    @inbounds for k in 1:d3
+        for j in 1:d2
+            for i in 1:d1
+                cluster = view(T, ((i - 1) * p + 1):(i * p), ((j - 1) * q + 1):(j * q), k)
+                O[i, j, k] = aggregate(cluster)
+            end
+        end
+    end
+    return O
+end
+
+size(::AbstractPoolingLayerOp) = (3, 3)
diff --git a/src/Architecture/NeuralNetworks/FeedforwardNetwork.jl b/src/Architecture/NeuralNetworks/FeedforwardNetwork.jl
@@ -32,14 +32,20 @@ end
 function _first_inconsistent_layer(L)
     prev = nothing
     for (i, l) in enumerate(L)
-        if !isnothing(prev) && dim_in(l) != dim_out(prev)
+        if !isnothing(prev) &&
+           ((!isnothing(dim_in(l)) && !isnothing(dim_out(prev)) && dim_in(l) != dim_out(prev)) ||
+            !_iscompatible(size(prev), size(l)))
             return i
         end
         prev = l
     end
     return 0
 end
 
+_iscompatible(t1::Tuple, t2::Tuple) = _iscompatible(t1[2], t2[1])
+_iscompatible(i::Int, j::Int) = i == j
+_iscompatible(i, ::Nothing) = true
+
 layers(N::FeedforwardNetwork) = N.layers
 
 function load_Flux_convert_network()

diff --git a/src/Architecture/init.jl b/src/Architecture/init.jl
@@ -3,6 +3,7 @@ function __init__()
     @require Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" begin
         eval(load_Flux_activations())
         eval(load_Flux_convert_Dense_layer())
+        eval(load_Flux_convert_Conv_layer())
         eval(load_Flux_convert_network())
     end
 end