From 7088682c78f8d77ce0e87a8eac0422025cc446bb Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Tue, 2 May 2023 20:55:27 -0400 Subject: [PATCH] Remove greek-letter keyword from `normalise` (#2252) * use _greek_ascii_depwarn in normalise * also a better example * change use in LayerNorm --- src/layers/normalise.jl | 2 +- src/layers/stateless.jl | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index a37f1bd863..594ad4c161 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -208,7 +208,7 @@ function (a::LayerNorm)(x::AbstractArray) end end eps = convert(float(eltype(x)), a.ϵ) # avoids promotion for Float16 data, but should ε chage too? - a.diag(normalise(x, dims=1:length(a.size), ϵ=eps)) + a.diag(normalise(x; dims=1:length(a.size), eps)) end function Base.show(io::IO, l::LayerNorm) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index da125a4cea..884af4053b 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -1,34 +1,44 @@ """ - normalise(x; dims=ndims(x), ϵ=1e-5) + normalise(x; dims=ndims(x), eps=1e-5) Normalise `x` to mean 0 and standard deviation 1 across the dimension(s) given by `dims`. Per default, `dims` is the last dimension. -`ϵ` is a small additive factor added to the denominator for numerical stability. +`eps` is a small term added to the denominator for numerical stability. # Examples ```jldoctest julia> using Statistics -julia> x = [9, 10, 20, 60]; +julia> x = [90, 100, 110, 130, 70]; -julia> y = Flux.normalise(x); +julia> mean(x), std(x; corrected=false) +(100.0, 20.0) -julia> isapprox(std(y), 1, atol=0.2) && std(y) != std(x) +julia> y = Flux.normalise(x) +5-element Vector{Float64}: + -0.49999975000012503 + 0.0 + 0.49999975000012503 + 1.499999250000375 + -1.499999250000375 + +julia> isapprox(std(y; corrected=false), 1, atol=1e-5) true -julia> x = rand(1:100, 10, 2); +julia> x = rand(10:100, 10, 10); julia> y = Flux.normalise(x, dims=1); -julia> isapprox(std(y, dims=1), ones(1, 2), atol=0.2) && std(y, dims=1) != std(x, dims=1) +julia> isapprox(std(y; dims=1, corrected=false), ones(1, 10), atol=1e-5) true ``` """ -@inline function normalise(x::AbstractArray; dims=ndims(x), ϵ=ofeltype(x, 1e-5)) +@inline function normalise(x::AbstractArray; dims=ndims(x), eps=ofeltype(x, 1e-5), ϵ=nothing) + ε = _greek_ascii_depwarn(ϵ => eps, :InstanceNorm, "ϵ" => "eps") μ = mean(x, dims=dims) σ = std(x, dims=dims, mean=μ, corrected=false) - return @. (x - μ) / (σ + ϵ) + return @. (x - μ) / (σ + ε) end """