Skip to content

Commit

Permalink
Merge pull request #45 from JuliaReach/schillic/layers
Browse files Browse the repository at this point in the history
Add more layer types
  • Loading branch information
schillic authored May 25, 2024
2 parents c2f6420 + 216a8ac commit 8c0a614
Show file tree
Hide file tree
Showing 17 changed files with 525 additions and 2 deletions.
4 changes: 4 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ uuid = "02ac4b2c-022a-44aa-84a5-ea45a5754bcc"
version = "0.2.2"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ReachabilityBase = "379f33d0-9447-4353-bd03-d664070e549f"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
LinearAlgebra = "<0.0.1, 1.6"
ReachabilityBase = "0.1.1 - 0.2"
Reexport = "0.2, 1"
Requires = "0.5, 1"
Statistics = "<0.0.1, 1.6"
julia = "1.6"
10 changes: 10 additions & 0 deletions docs/src/lib/Architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,20 @@ dim_out(::AbstractLayerOp)
ControllerFormats.Architecture.dim(::AbstractLayerOp)
```

#### More specific layer interfaces

```@docs
AbstractPoolingLayerOp
```

#### Implementation

```@docs
DenseLayerOp
ConvolutionalLayerOp
FlattenLayerOp
MaxPoolingLayerOp
MeanPoolingLayerOp
```

### Activation functions
Expand Down
9 changes: 8 additions & 1 deletion src/Architecture/Architecture.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,22 @@ Module containing data structures to represent controllers.
module Architecture

using Requires
using LinearAlgebra: dot
using Statistics: mean

import Base: size
export AbstractNeuralNetwork, FeedforwardNetwork,
AbstractLayerOp, DenseLayerOp,
AbstractLayerOp, DenseLayerOp, ConvolutionalLayerOp, FlattenLayerOp,
AbstractPoolingLayerOp, MaxPoolingLayerOp, MeanPoolingLayerOp,
layers, dim_in, dim_out,
ActivationFunction, Id, ReLU, Sigmoid, Tanh, LeakyReLU

include("ActivationFunction.jl")
include("LayerOps/AbstractLayerOp.jl")
include("LayerOps/DenseLayerOp.jl")
include("LayerOps/ConvolutionalLayerOp.jl")
include("LayerOps/FlattenLayerOp.jl")
include("LayerOps/PoolingLayerOp.jl")
include("NeuralNetworks/AbstractNeuralNetwork.jl")
include("NeuralNetworks/FeedforwardNetwork.jl")

Expand Down
154 changes: 154 additions & 0 deletions src/Architecture/LayerOps/ConvolutionalLayerOp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""
ConvolutionalLayerOp{F, M, B} <: AbstractLayerOp
A convolutional layer operation is a series of filters, each of which computes a
small affine map followed by an activation function.
### Fields
- `weights` -- vector with one weight matrix for each filter
- `bias` -- vector with one bias value for each filter
- `activation` -- activation function
### Notes
Conversion from a `Flux.Conv` is supported.
"""
struct ConvolutionalLayerOp{F,W,B} <: AbstractLayerOp
weights::W
bias::B
activation::F

function ConvolutionalLayerOp(weights::W, bias::B, activation::F;
validate=Val(true)) where {F,W,B}
if validate isa Val{true} && !_isconsistent_ConvolutionalLayerOp(weights, bias)
throw(ArgumentError("inconsistent filter dimensions: weights " *
"($(length(weights))) and biases ($(length(bias)))"))
end

return new{F,W,B}(weights, bias, activation)
end
end

function _isconsistent_ConvolutionalLayerOp(weights, bias)
if length(weights) != length(bias)
return false
elseif length(bias) == 0
return false
end
@inbounds begin
s = size(first(weights))
if length(s) != 3 || s[1] == 0 || s[2] == 0 || s[3] == 0
return false
end
for e in weights
if size(e) != s
return false
end
end
end
return true
end

n_filters(L::ConvolutionalLayerOp) = length(L.bias)

kernel(L::ConvolutionalLayerOp) = @inbounds size(first(L.weights))

# application to a tensor
function (L::ConvolutionalLayerOp)(T)
s = size(T)
if length(s) != 3
throw(ArgumentError("a convolutional layer requires at least two dimensions, but got $s"))
end
p, q, r = kernel(L)
@inbounds begin
if p > s[1] || q > s[2] || r != s[3]
throw(ArgumentError("convolution with kernel size $(kernel(L)) " *
"does not apply to a tensor of dimension $s"))
end
d1 = s[1] - p + 1
d2 = s[2] - q + 1
end
t = n_filters(L)
s = (d1, d2, t)
O = similar(T, s)
@inbounds for f in 1:t
W = L.weights[f]
b = L.bias[f]
for k in 1:r
for j in 1:d2
for i in 1:d1
T′ = view(T, i:(i + p - 1), j:(j + q - 1), k)
O[i, j, f] = L.activation(dot(W, T′) + b)
end
end
end
end
return O
end

function Base.:(==)(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp)
return L1.weights == L2.weights &&
L1.bias == L2.bias &&
L1.activation == L2.activation
end

function Base.:isapprox(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp; atol::Real=0,
rtol=nothing)
if isnothing(rtol)
if iszero(atol)
N = @inbounds promote_type(eltype(first(L1.weights)), eltype(first(L2.weights)),
eltype(L1.bias), eltype(L2.bias))
rtol = Base.rtoldefault(N)
else
rtol = zero(atol)
end
end
return isapprox(L1.weights, L2.weights; atol=atol, rtol=rtol) &&
isapprox(L1.bias, L2.bias; atol=atol, rtol=rtol) &&
L1.activation == L2.activation
end

function Base.show(io::IO, L::ConvolutionalLayerOp)
str = "$(string(ConvolutionalLayerOp)) of $(n_filters(L)) filters with " *
"kernel size $(kernel(L)) and $(typeof(L.activation)) activation"
return print(io, str)
end

size(::ConvolutionalLayerOp) = (3, 3)

function load_Flux_convert_Conv_layer()
return quote
function Base.convert(::Type{ConvolutionalLayerOp}, layer::Flux.Conv)
if !all(isone, layer.stride)
throw(ArgumentError("stride $(layer.stride) != 1 is not supported")) # COV_EXCL_LINE
end
if !all(iszero, layer.pad)
throw(ArgumentError("pad $(layer.pad) != 0 is not supported")) # COV_EXCL_LINE
end
if !all(isone, layer.dilation)
throw(ArgumentError("dilation $(layer.dilation) != 1 is not supported")) # COV_EXCL_LINE
end
if !all(isone, layer.groups)
throw(ArgumentError("groups $(layer.groups) != 1 is not supported")) # COV_EXCL_LINE
end
act = get(activations_Flux, layer.σ, nothing)
if isnothing(act)
throw(ArgumentError("unsupported activation function $(layer.σ)")) # COV_EXCL_LINE
end
# Flux stores a 4D matrix instead of a vector of 3D matrices
weights = @inbounds [layer.weight[:, :, :, i] for i in 1:size(layer.weight, 4)]
return ConvolutionalLayerOp(weights, layer.bias, act)
end

function Base.convert(::Type{Flux.Conv}, layer::ConvolutionalLayerOp)
act = get(activations_Flux, layer.activation, nothing)
if isnothing(act)
throw(ArgumentError("unsupported activation function $(layer.activation)")) # COV_EXCL_LINE
end
# Flux stores a 4D matrix instead of a vector of 3D matrices
weights = cat(layer.weights...; dims=4)
return Flux.Conv(weights, layer.bias, act)
end
end
end
2 changes: 2 additions & 0 deletions src/Architecture/LayerOps/DenseLayerOp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ dim_in(L::DenseLayerOp) = size(L.weights, 2)

dim_out(L::DenseLayerOp) = length(L.bias)

size(::DenseLayerOp) = (1, 1)

function load_Flux_convert_Dense_layer()
return quote
function Base.convert(::Type{DenseLayerOp}, layer::Flux.Dense)
Expand Down
46 changes: 46 additions & 0 deletions src/Architecture/LayerOps/FlattenLayerOp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
FlattenLayerOp <: AbstractLayerOp
A flattening layer operation converts a multidimensional tensor into a vector.
### Notes
The implementation uses row-major ordering for convenience with the
machine-learning literature.
```@jldoctest
julia> T = reshape([1, 3, 2, 4, 5, 7, 6, 8], (2, 2, 2))
2×2×2 Array{Int64, 3}:
[:, :, 1] =
1 2
3 4
[:, :, 2] =
5 6
7 8
julia> FlattenLayerOp()(T)
8-element Vector{Int64}:
1
2
3
4
5
6
7
8
```
"""
struct FlattenLayerOp <: AbstractLayerOp
end

# application to a vector (swap to row-major convention)
function (L::FlattenLayerOp)(T)
s = size(T)
if length(s) == 1
return vec(T)
end
return vec(permutedims(T, (2, 1, 3:length(s)...)))
end

size(::FlattenLayerOp) = (nothing, 1)
94 changes: 94 additions & 0 deletions src/Architecture/LayerOps/PoolingLayerOp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
AbstractPoolingLayerOp <: AbstractLayerOp
Abstract type for pooling layer operations.
### Notes
Pooling is an operation on a three-dimensional tensor that iterates over the
first two dimensions in a window and aggregates the values, thus reducing the
output dimension.
### Implementation
The following (unexported) functions should be implemented:
- `window(::AbstractPoolingLayerOp)` -- return the pair ``(p, q)`` representing the window size
- `aggregation(::AbstractPoolingLayerOp)` -- return the aggregation function (applied to a tensor)
"""
abstract type AbstractPoolingLayerOp <: AbstractLayerOp end

for (type_name, normal_name, agg_function, agg_name) in
((:MaxPoolingLayerOp, "max", maximum, "maximum"),
(:MeanPoolingLayerOp, "mean", mean, "Statistics.mean"))
@eval begin
@doc """
$($type_name) <: AbstractPoolingLayerOp
A $($normal_name)-pooling layer operation. The aggregation function is
`$($agg_name)`.
### Fields
- `p` -- horizontal window size
- `q` -- vertical window size
"""
struct $type_name <: AbstractPoolingLayerOp
p::Int
q::Int

function $type_name(p::Int, q::Int; validate=Val(true))
if validate isa Val{true} && (p <= 0 || q <= 0)
throw(ArgumentError("inconsistent window size ($p, $q)"))
end
return new(p, q)
end
end

window(L::$type_name) = (L.p, L.q)

aggregation(::$type_name) = $agg_function

function Base.:(==)(L1::$type_name, L2::$type_name)
return window(L1) == window(L2)
end

function Base.show(io::IO, L::$type_name)
str = "$(string($type_name)) for $($normal_name)-pooling of window " *
"size $(window(L))"
return print(io, str)
end
end
end

# application to a tensor
function (L::AbstractPoolingLayerOp)(T)
s = size(T)
if length(s) != 3
throw(ArgumentError("a pooling layer requires a three-dimensional input, but got $s"))
end
p, q = window(L)
@inbounds begin
if mod(s[1], p) != 0 || mod(s[2], q) != 0
throw(ArgumentError("pooling with window size ($p, $q) does " *
"not apply to a tensor of dimension $s"))
end
d1 = div(s[1], p)
d2 = div(s[2], q)
d3 = s[3]
end
s = (d1, d2, d3)
O = similar(T, s)
aggregate = aggregation(L)
@inbounds for k in 1:d3
for j in 1:d2
for i in 1:d1
cluster = view(T, ((i - 1) * p + 1):(i * p), ((j - 1) * q + 1):(j * q), k)
O[i, j, k] = aggregate(cluster)
end
end
end
return O
end

size(::AbstractPoolingLayerOp) = (3, 3)
8 changes: 7 additions & 1 deletion src/Architecture/NeuralNetworks/FeedforwardNetwork.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,20 @@ end
function _first_inconsistent_layer(L)
prev = nothing
for (i, l) in enumerate(L)
if !isnothing(prev) && dim_in(l) != dim_out(prev)
if !isnothing(prev) &&
((!isnothing(dim_in(l)) && !isnothing(dim_out(prev)) && dim_in(l) != dim_out(prev)) ||
!_iscompatible(size(prev), size(l)))
return i
end
prev = l
end
return 0
end

_iscompatible(t1::Tuple, t2::Tuple) = _iscompatible(t1[2], t2[1])
_iscompatible(i::Int, j::Int) = i == j
_iscompatible(i, ::Nothing) = true

layers(N::FeedforwardNetwork) = N.layers

function load_Flux_convert_network()
Expand Down
1 change: 1 addition & 0 deletions src/Architecture/init.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ function __init__()
@require Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" begin
eval(load_Flux_activations())
eval(load_Flux_convert_Dense_layer())
eval(load_Flux_convert_Conv_layer())
eval(load_Flux_convert_network())
end
end
Loading

0 comments on commit 8c0a614

Please sign in to comment.