From 4c425d711ea5537b68b1434aebcbbda268649a9a Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Tue, 2 May 2023 09:42:21 -0400 Subject: [PATCH 1/3] use _greek_ascii_depwarn in normalise --- src/layers/stateless.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index da125a4cea..0f71f74b55 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -1,10 +1,10 @@ """ - normalise(x; dims=ndims(x), ϵ=1e-5) + normalise(x; dims=ndims(x), eps=1e-5) Normalise `x` to mean 0 and standard deviation 1 across the dimension(s) given by `dims`. Per default, `dims` is the last dimension. -`ϵ` is a small additive factor added to the denominator for numerical stability. +`eps` is a small term added to the denominator for numerical stability. # Examples ```jldoctest @@ -25,10 +25,11 @@ julia> isapprox(std(y, dims=1), ones(1, 2), atol=0.2) && std(y, dims=1) != std(x true ``` """ -@inline function normalise(x::AbstractArray; dims=ndims(x), ϵ=ofeltype(x, 1e-5)) +@inline function normalise(x::AbstractArray; dims=ndims(x), eps=ofeltype(x, 1e-5), ϵ=nothing) + ε = _greek_ascii_depwarn(ϵ => eps, :InstanceNorm, "ϵ" => "eps") μ = mean(x, dims=dims) σ = std(x, dims=dims, mean=μ, corrected=false) - return @. (x - μ) / (σ + ϵ) + return @. (x - μ) / (σ + ε) end """ From 44d8401f6cfd2243c5486ad084b8484fe2ef7a8d Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Tue, 2 May 2023 09:53:07 -0400 Subject: [PATCH 2/3] also a better example --- src/layers/stateless.jl | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 0f71f74b55..884af4053b 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -10,18 +10,27 @@ Per default, `dims` is the last dimension. ```jldoctest julia> using Statistics -julia> x = [9, 10, 20, 60]; +julia> x = [90, 100, 110, 130, 70]; -julia> y = Flux.normalise(x); +julia> mean(x), std(x; corrected=false) +(100.0, 20.0) -julia> isapprox(std(y), 1, atol=0.2) && std(y) != std(x) +julia> y = Flux.normalise(x) +5-element Vector{Float64}: + -0.49999975000012503 + 0.0 + 0.49999975000012503 + 1.499999250000375 + -1.499999250000375 + +julia> isapprox(std(y; corrected=false), 1, atol=1e-5) true -julia> x = rand(1:100, 10, 2); +julia> x = rand(10:100, 10, 10); julia> y = Flux.normalise(x, dims=1); -julia> isapprox(std(y, dims=1), ones(1, 2), atol=0.2) && std(y, dims=1) != std(x, dims=1) +julia> isapprox(std(y; dims=1, corrected=false), ones(1, 10), atol=1e-5) true ``` """ From 98f273e05d48631a661e3ed436278786aeb08895 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Tue, 2 May 2023 11:47:19 -0400 Subject: [PATCH 3/3] change use in LayerNorm --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index a37f1bd863..594ad4c161 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -208,7 +208,7 @@ function (a::LayerNorm)(x::AbstractArray) end end eps = convert(float(eltype(x)), a.ϵ) # avoids promotion for Float16 data, but should ε chage too? - a.diag(normalise(x, dims=1:length(a.size), ϵ=eps)) + a.diag(normalise(x; dims=1:length(a.size), eps)) end function Base.show(io::IO, l::LayerNorm)