Skip to content

Commit

Permalink
Merge pull request #261 from FluxML/cl/tests
Browse files Browse the repository at this point in the history
fix hardsigmoid and use float(x) instead of x/1
  • Loading branch information
CarloLucibello authored Jan 5, 2021
2 parents c08258b + 440ed3b commit 090a0ec
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 35 deletions.
62 changes: 35 additions & 27 deletions src/activations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ end
# Aliases
export sigmoid, hardsigmoid, logsigmoid, thresholdrelu


# of type float
oftf(x, y) = oftype(float(x), y)

"""
σ(x) = 1 / (1 + exp(-x))
Expand All @@ -33,13 +34,14 @@ end
const sigmoid = σ

"""
hardσ(x, a=0.2) = max(0, min(1, a * x + 0.5))
hardσ(x) = max(0, min(1, (x + 3) / 6)
Segment-wise linear approximation of sigmoid.
See [BinaryConnect: Training Deep Neural Networks withbinary weights during propagations](https://arxiv.org/abs/1511.00363).
Piecewise linear approximation of sigmoid.
"""
hardσ(x, a=0.2) = oftype(x/1, max(zero(x/1), min(one(x/1), oftype(x/1,a) * x + oftype(x/1,0.5))))

hardσ(x) = max(0, min(1, (x + 3) / 6))

# https://pytorch.org/docs/stable/generated/torch.nn.Hardsigmoid.html

const hardsigmoid = hardσ

"""
Expand All @@ -56,7 +58,7 @@ const logsigmoid = logσ
Segment-wise linear approximation of tanh. Cheaper and more computational efficient version of tanh.
See [Large Scale Machine Learning](https://ronan.collobert.com/pub/matos/2004_phdthesis_lip6.pdf).
"""
hardtanh(x) = max(-one(x), min( one(x), x))
hardtanh(x) = max(-one(x), min(one(x), x))

"""
relu(x) = max(0, x)
Expand All @@ -73,7 +75,7 @@ Leaky [Rectified Linear Unit](https://en.wikipedia.org/wiki/Rectifier_(neural_ne
activation function.
You can also specify the coefficient explicitly, e.g. `leakyrelu(x, 0.01)`.
"""
leakyrelu(x, a = oftype(x/1, 0.01)) = max(a * x, x/1)
leakyrelu(x, a=oftf(x, 0.01)) = max(a * x, x)

"""
relu6(x) = min(max(0, x), 6)
Expand All @@ -93,8 +95,8 @@ Randomized Leaky [Rectified Linear Unit](https://arxiv.org/abs/1505.00853)
activation function.
You can also specify the bound explicitly, e.g. `rrelu(x, 0.0, 1.0)`.
"""
function rrelu(x, l = 1 / 8.0, u = 1 / 3.0)
a = oftype(x / 1, (u - l) * rand() + l)
function rrelu(x::T, l=1//8, u=1//3) where T<:Number
a = (u - l) * rand(float(T)) + l
return leakyrelu(x, a)
end

Expand All @@ -105,10 +107,9 @@ Exponential Linear Unit activation function.
See [Fast and Accurate Deep Network Learning by Exponential Linear Units](https://arxiv.org/abs/1511.07289).
You can also specify the coefficient explicitly, e.g. `elu(x, 1)`.
"""
elu(x, α=1) = ifelse(x 0, x/1, α * (exp(x) - 1))

deriv_elu(x, Ω, α=1) = ifelse(x 0, one(x), Ω + α)
elu(x, α=1) = ifelse(x 0, float(x), α * (exp(x) - 1))

deriv_elu(Ω, α=1) = ifelse 0, 1, Ω + α)

"""
gelu(x) = 0.5x * (1 + tanh(√(2/π) * (x + 0.044715x^3)))
Expand All @@ -117,11 +118,13 @@ deriv_elu(x, Ω, α=1) = ifelse(x ≥ 0, one(x), Ω + α)
activation function.
"""
function gelu(x)
λ = oftype(x / 1, (2 / π))
α = oftype(x / 1, 0.044715)
α = oftf(x, 0.044715)
λ = oftf(x, gelu_λ)
x/2 * (1 + tanh* (x + α * x^3)))
end

const gelu_λ = (2 / π)

"""
swish(x) = x * σ(x)
Expand All @@ -148,15 +151,18 @@ Scaled exponential linear units.
See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515).
"""
function selu(x)
λ = oftype(x/1, 1.0507009873554804934193349852946)
α = oftype(x/1, 1.6732632423543772848170429916717)
λ * ifelse(x > 0, x/1, α * (exp(x) - 1))
λ = oftf(x, selu_λ)
α = oftf(x, selu_α)
λ * ifelse(x > 0, x, α * (exp(x) - 1))
end

const selu_λ = 1.0507009873554804934193349852946
const selu_α = 1.6732632423543772848170429916717

function deriv_selu(Ω)
λ = oftype/1, 1.0507009873554804934193349852946)
α = oftype/1, 1.6732632423543772848170429916717)
return ifelse> 0, λ, Ω + α*λ)
λ = oftf(Ω, selu_λ)
α = oftf(Ω, selu_α)
ifelse> 0, λ, Ω + α * λ)
end

"""
Expand All @@ -165,7 +171,7 @@ end
Continuously Differentiable Exponential Linear Units
See [Continuously Differentiable Exponential Linear Units](https://arxiv.org/abs/1704.07483).
"""
celu(x, α=1) = ifelse(x 0, x/1, α * (exp(x/α) - 1))
celu(x, α=1) = ifelse(x 0, float(x), α * (exp(x/α) - 1))

"""
trelu(x, theta=1) = x > theta ? x : 0
Expand All @@ -174,14 +180,15 @@ Threshold Gated Rectified Linear.
See [ThresholdRelu](https://arxiv.org/abs/1402.3337)
"""
trelu(x, theta=1) = ifelse(x > theta, x, zero(x))

const thresholdrelu = trelu

"""
softsign(x) = x / (1 + |x|)
See [Quadratic Polynomials Learn Better Image Features](http://www.iro.umontreal.ca/~lisa/publications2/index.php/attachments/single/205).
"""
softsign(x) = x / (one(x) + abs(x))
softsign(x) = x / (1 + abs(x))

"""
softplus(x) = log(exp(x) + 1)
Expand All @@ -195,8 +202,9 @@ softplus(x) = ifelse(x > 0, x + log1p(exp(-x)), log1p(exp(x)))
Return `log(cosh(x))` which is computed in a numerically stable way.
"""
logcosh(x) = x + softplus(-2x) - log(oftype(x, 2))
logcosh(x) = x + softplus(-2x) - oftf(x, log2)

const log2 = log(2)

"""
mish(x) = x * tanh(softplus(x))
Expand All @@ -219,7 +227,7 @@ tanhshrink(x) = x - tanh(x)
See [Softshrink Activation Function](https://www.gabormelli.com/RKB/Softshrink_Activation_Function).
"""
softshrink(x, λ = oftype(x/1, 0.5)) = min(max(zero(x), x - λ), x + λ)
softshrink(x, λ=oftf(x, 0.5)) = min(max(0, x - λ), x + λ)

# Provide an informative error message if activation functions are called with an array
for f in ACTIVATIONS
Expand All @@ -241,7 +249,7 @@ UNARY_ACTS = [ # f, df
(:hardtanh, :(-1 < x < 1)),
(:selu, :(deriv_selu(Ω))),
(, :(conj* (1 - Ω)))),
(:elu, :(deriv_elu(x, Ω))),
(:elu, :(deriv_elu(Ω))),
]

for (f, df) in UNARY_ACTS
Expand All @@ -260,7 +268,7 @@ end


BINARY_ACTS = [ # f, df1, df2
(:elu, :(deriv_elu(x1, Ω, x2)), :(DoesNotExist())), # TODO use real deriv instead of DNE
(:elu, :(deriv_elu(Ω, x2)), :(DoesNotExist())), # TODO use real deriv instead of DNE
]

for (f, df1, df2) in BINARY_ACTS
Expand Down
14 changes: 6 additions & 8 deletions test/activations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ACTIVATION_FUNCTIONS =

function test_value_float_precision_preserving(a)
@testset "$(a): " begin
for T in [Float32, Float64]
for T in [Float16, Float32, Float64]
for val in [-10, -1, 0, 1, 10]
val = @inferred a(T(val))
@test typeof(val) == T
Expand All @@ -28,7 +28,7 @@ end

function test_gradient_float_precision_preserving(a)
@testset "$(a): " begin
for T in [Float32, Float64]
for T in [Float16, Float32, Float64]
for val in [-10, -1, 0, 1, 10]
val = @inferred a'(T(val))
@test typeof(val) == T
Expand Down Expand Up @@ -61,7 +61,7 @@ end
@test softshrink(0.0) == 0.0

@test sigmoid(1.0) == 1.0 / (1.0 + exp(-1.0))
@test hardsigmoid(1.0) == max(0,min(1,0.2*1.0 + 0.5))
@test hardsigmoid(1.0) == max(0,min(1, (1 + 3)/6))
@test hardtanh(1.0) == 1.0
@test relu(1.0) == 1.0
@test leakyrelu(1.0) == 1.0
Expand All @@ -82,7 +82,7 @@ end
@test softshrink(1.0) == 0.5

@test sigmoid(-1.0) == exp(-1.0) / (1.0 + exp(-1.0))
@test hardsigmoid(-1.0) == max(0,min(1,0.2*-1.0 + 0.5))
@test hardsigmoid(-1.0) == max(0,min(1,(-1+3)/6 ))
@test hardtanh(-1.0) == -1.0
@test relu(-1.0) == 0.0
@test leakyrelu(-1.0) == -0.01
Expand Down Expand Up @@ -189,9 +189,8 @@ end
@test logcosh(1_000.0) + log(2) == 1_000.0

@testset "hardsigmoid" begin
@test hardsigmoid(0.3) == 0.56
@test hardsigmoid(-0.3) == 0.44
@test hardsigmoid(0.1,0.5) == 0.55
@test hardsigmoid(0.3) == max(0,min(1,(0.3+3)/6))
@test hardsigmoid(-0.3) == max(0,min(1,(-0.3+3)/6))
for T in [:Float32, :Float64]
@eval @test hardsigmoid.($T[-100_000, 100_000.]) $T[0., 1.]
end
Expand Down Expand Up @@ -260,4 +259,3 @@ end
gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2)
gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2)
end

2 comments on commit 090a0ec

@CarloLucibello
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/27372

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.7.10 -m "<description of version>" 090a0ecff43d4bfd87bb1e33015ef7eb3d0c12a9
git push origin v0.7.10

Please sign in to comment.