diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml index 6a9f4b777e..8f62bdb58a 100644 --- a/.github/workflows/Downstream.yml +++ b/.github/workflows/Downstream.yml @@ -26,7 +26,7 @@ jobs: - {user: Chemellia, repo: AtomicGraphNets.jl, group: All} - {user: SciML, repo: DiffEqFlux.jl, group: Layers} - {user: SciML, repo: NeuralPDE.jl, group: NNPDE} - + - {user: SciML, repo: OperatorLearning.jl, group: All} if: contains(github.event.pull_request.labels.*.name, 'run downstream test') steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8cc6bcf4a0..423682e0bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,10 +22,15 @@ jobs: - 'nightly' os: - ubuntu-latest - - macOS-latest - - windows-latest arch: - x64 + include: + - os: windows-latest + version: '1' + arch: x64 + - os: macOS-latest + version: '1' + arch: x64 steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/NEWS.md b/NEWS.md index 07852c2dde..a9db7cfa58 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # Flux Release Notes +## v0.13 +* After a deprecations cycle, the datasets in `Flux.Data` have +been removed in favour of MLDatasets.jl. +* `params` is not exported anymore since it is a common name and is also exported by Distributions.jl +* `flatten` is not exported anymore due to clash with Iterators.flatten. +* Remove Juno.jl progress bar support as it is now obsolete. +* `Dropout` gained improved compatibility with Int and Complex arrays and is now twice-differentiable. + ## v0.12.10 * `Dropout`/`AlphaDropout` now supports [user-specified RNGs](https://github.com/FluxML/Flux.jl/pull/1838) diff --git a/Project.toml b/Project.toml index fdbbc21b7c..4165efce8e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,50 +1,38 @@ name = "Flux" uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c" -version = "0.12.9" +version = "0.13.0-DEV" [deps] -AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" -Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" -DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] -AbstractTrees = "0.3" Adapt = "3.0" ArrayInterface = "3.1, 4" CUDA = "3" ChainRulesCore = "1.12" -CodecZlib = "0.7" -Colors = "0.12" Functors = "0.2.1" MacroTools = "0.5" -NNlib = "0.8" +NNlib = "0.8.2" NNlibCUDA = "0.2" ProgressLogging = "0.1" Reexport = "0.2, 1.0" StatsBase = "0.33" -ZipFile = "0.9" Zygote = "0.6.34" julia = "1.6" diff --git a/docs/src/models/advanced.md b/docs/src/models/advanced.md index 6769706b82..d2e738362c 100644 --- a/docs/src/models/advanced.md +++ b/docs/src/models/advanced.md @@ -97,8 +97,8 @@ We can freeze a specific parameter of a specific layer which already entered a ` by simply deleting it from `ps`: ```julia -ps = params(m) -delete!(ps, m[2].bias) +ps = Flux.params(m) +delete!(ps, m[2].bias) ``` ## Custom multiple input or output layer diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md index c62d7d004e..3f8e57b166 100644 --- a/docs/src/models/basics.md +++ b/docs/src/models/basics.md @@ -39,7 +39,7 @@ julia> x = [2, 1]; julia> y = [2, 0]; -julia> gs = gradient(params(x, y)) do +julia> gs = gradient(Flux.params(x, y)) do f(x, y) end Grads(...) @@ -83,7 +83,7 @@ To improve the prediction we can take the gradients of the loss with respect to ```julia using Flux -gs = gradient(() -> loss(x, y), params(W, b)) +gs = gradient(() -> loss(x, y), Flux.params(W, b)) ``` Now that we have gradients, we can pull them out and update `W` to train the model. diff --git a/docs/src/models/recurrence.md b/docs/src/models/recurrence.md index 65a3cc7430..ba5f5ade0a 100644 --- a/docs/src/models/recurrence.md +++ b/docs/src/models/recurrence.md @@ -160,7 +160,7 @@ data = zip(X,Y) Flux.reset!(m) [m(x) for x in seq_init] -ps = params(m) +ps = Flux.params(m) opt= ADAM(1e-3) Flux.train!(loss, ps, data, opt) ``` diff --git a/docs/src/saving.md b/docs/src/saving.md index b1771cd5a0..9b1db909ce 100644 --- a/docs/src/saving.md +++ b/docs/src/saving.md @@ -62,7 +62,7 @@ julia> using Flux julia> model = Chain(Dense(10,5,relu),Dense(5,2),softmax) Chain(Dense(10, 5, NNlib.relu), Dense(5, 2), NNlib.softmax) -julia> weights = params(model); +julia> weights = Flux.params(model); julia> using BSON: @save diff --git a/docs/src/training/optimisers.md b/docs/src/training/optimisers.md index 7f3ad6bf37..948c7a52ba 100644 --- a/docs/src/training/optimisers.md +++ b/docs/src/training/optimisers.md @@ -14,7 +14,7 @@ loss(x, y) = sum((predict(x) .- y).^2) x, y = rand(5), rand(2) # Dummy data l = loss(x, y) # ~ 3 -θ = params(W, b) +θ = Flux.params(W, b) grads = gradient(() -> loss(x, y), θ) ``` diff --git a/docs/src/training/training.md b/docs/src/training/training.md index 845a22d8a6..9db2330b65 100644 --- a/docs/src/training/training.md +++ b/docs/src/training/training.md @@ -64,7 +64,7 @@ At first glance it may seem strange that the model that we want to train is not ## Model parameters -The model to be trained must have a set of tracked parameters that are used to calculate the gradients of the objective function. In the [basics](../models/basics.md) section it is explained how to create models with such parameters. The second argument of the function `Flux.train!` must be an object containing those parameters, which can be obtained from a model `m` as `params(m)`. +The model to be trained must have a set of tracked parameters that are used to calculate the gradients of the objective function. In the [basics](../models/basics.md) section it is explained how to create models with such parameters. The second argument of the function `Flux.train!` must be an object containing those parameters, which can be obtained from a model `m` as `Flux.params(m)`. Such an object contains a reference to the model's parameters, not a copy, such that after their training, the model behaves according to their updated values. diff --git a/src/Flux.jl b/src/Flux.jl index 4909969cd1..3fd6fef5d0 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -11,13 +11,13 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd export gradient using ChainRulesCore -export Chain, Dense, Maxout, SkipConnection, Parallel, flatten, +export Chain, Dense, Maxout, SkipConnection, Parallel, RNN, LSTM, GRU, GRUv3, SamePad, Conv, CrossCor, ConvTranspose, DepthwiseConv, AdaptiveMaxPool, AdaptiveMeanPool, GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm, Upsample, PixelShuffle, - params, fmap, cpu, gpu, f32, f64, + fmap, cpu, gpu, f32, f64, testmode!, trainmode! include("optimise/Optimise.jl") diff --git a/src/data/Data.jl b/src/data/Data.jl index d00aeb709c..cb3a073969 100644 --- a/src/data/Data.jl +++ b/src/data/Data.jl @@ -6,62 +6,4 @@ using Base: @propagate_inbounds include("dataloader.jl") export DataLoader -## TODO for v0.13: remove everything below ############## -## Also remove the following deps: -## AbstractTrees, ZipFiles, CodecZLib - -import ..Flux -import SHA - -deprecation_message() = @warn("Flux's datasets are deprecated, please use the package MLDatasets.jl") - -function deps(path...) - if isnothing(@__DIR__) # sysimages - joinpath("deps", path...) - else - joinpath(@__DIR__, "..", "..", "deps", path...) - end -end - -function download_and_verify(url, path, hash) - tmppath = tempname() - download(url, tmppath) - hash_download = open(tmppath) do f - bytes2hex(SHA.sha256(f)) - end - if hash_download !== hash - msg = "Hash Mismatch!\n" - msg *= " Expected sha256: $hash\n" - msg *= " Calculated sha256: $hash_download" - error(msg) - end - mv(tmppath, path; force=true) -end - -function __init__() - mkpath(deps()) -end - -include("mnist.jl") -export MNIST - -include("fashion-mnist.jl") -export FashionMNIST - -include("cmudict.jl") -export CMUDict -using .CMUDict; export cmudict - -include("tree.jl") -include("sentiment.jl") -export Sentiment - -include("iris.jl") -export Iris - -include("housing.jl") -export Housing - -######################################### - end#module diff --git a/src/data/cmudict.jl b/src/data/cmudict.jl deleted file mode 100644 index d096727c2a..0000000000 --- a/src/data/cmudict.jl +++ /dev/null @@ -1,77 +0,0 @@ -module CMUDict - -export cmudict - -using ..Data: deps, download_and_verify, deprecation_message - -const version = "0.7b" -const cache_prefix = "https://cache.julialang.org" - -function load() - suffixes_and_hashes = [("" , "209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4"), - (".phones" , "ffb588a5e55684723582c7256e1d2f9fadb130011392d9e59237c76e34c2cfd6"), - (".symbols", "408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027")] - if isdir(deps("cmudict")) - if all(isfile(deps("cmudict", "cmudict$x")) for (x, _) in suffixes_and_hashes) - return - end - end - @info "Downloading CMUDict dataset" - mkpath(deps("cmudict")) - for (x, hash) in suffixes_and_hashes - download_and_verify("$cache_prefix/https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x", - deps("cmudict", "cmudict$x"), hash) - end -end - -""" - phones() -Return a `Vector` containing the phones used in the CMU Pronouncing Dictionary. -""" -function phones() - deprecation_message() - load() - Symbol.(first.(split.(split(read(deps("cmudict", "cmudict.phones"),String), - "\n", keepempty = false), "\t"))) -end - -""" - symbols() -Return a `Vector` containing the symbols used in the CMU Pronouncing Dictionary. -A symbol is a phone with optional auxiliary symbols, indicating for example the -amount of stress on the phone. -""" -function symbols() - deprecation_message() - load() - Symbol.(split(read(deps("cmudict", "cmudict.symbols"),String), - "\n", keepempty = false)) -end - -""" - rawdict() -Return the unfiltered CMU Pronouncing Dictionary. -""" -function rawdict() - deprecation_message() - load() - Dict(String(xs[1]) => Symbol.(xs[2:end]) for xs in - filter(!isempty, split.(split(read(deps("cmudict", "cmudict"),String), "\n")))) -end - -validword(s) = isascii(s) && occursin(r"^[\w\-\.]+$", s) - -""" - cmudict() -Return a filtered CMU Pronouncing Dictionary. -It is filtered so each word contains only ASCII characters and a combination of -word characters (as determined by the regex engine using `\\w`), '-' and '.'. -""" -function cmudict() - deprecation_message() - filter(p -> validword(p.first), rawdict()) -end - -alphabet() = ['A':'Z'..., '0':'9'..., '_', '-', '.'] - -end \ No newline at end of file diff --git a/src/data/fashion-mnist.jl b/src/data/fashion-mnist.jl deleted file mode 100644 index 18999e9d5e..0000000000 --- a/src/data/fashion-mnist.jl +++ /dev/null @@ -1,67 +0,0 @@ -module FashionMNIST - -using ..MNIST: gzopen, imageheader, rawimage, labelheader, rawlabel -using ..Data: download_and_verify, deprecation_message - -const dir = if isnothing(@__DIR__) - joinpath("deps", "fashion-mnist") - else - joinpath(@__DIR__, "../../deps/fashion-mnist") -end - -function load() - mkpath(dir) - cd(dir) do - for (file, hash) in [("train-images-idx3-ubyte", "3aede38d61863908ad78613f6a32ed271626dd12800ba2636569512369268a84"), - ("train-labels-idx1-ubyte", "a04f17134ac03560a47e3764e11b92fc97de4d1bfaf8ba1a3aa29af54cc90845"), - ("t10k-images-idx3-ubyte" , "346e55b948d973a97e58d2351dde16a484bd415d4595297633bb08f03db6a073"), - ("t10k-labels-idx1-ubyte" , "67da17c76eaffca5446c3361aaab5c3cd6d1c2608764d35dfb1850b086bf8dd5")] - isfile(file) && continue - @info "Downloading Fashion-MNIST dataset" - download_and_verify("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/$file.gz", "$file.gz", hash) - open(file, "w") do io - write(io, gzopen(read, "$file.gz")) - end - end - end -end - -const TRAINIMAGES = joinpath(dir, "train-images-idx3-ubyte") -const TRAINLABELS = joinpath(dir, "train-labels-idx1-ubyte") -const TESTIMAGES = joinpath(dir, "t10k-images-idx3-ubyte") -const TESTLABELS = joinpath(dir, "t10k-labels-idx1-ubyte") - -""" - images() - images(:test) -Load the Fashion-MNIST images. -Each image is a 28×28 array of `Gray` colour values -(see [Colors.jl](https://github.com/JuliaGraphics/Colors.jl)). -Return the 60,000 training images by default; pass `:test` to retrieve the -10,000 test images. -""" -function images(set = :train) - deprecation_message() - load() - io = IOBuffer(read(set == :train ? TRAINIMAGES : TESTIMAGES)) - _, N, nrows, ncols = imageheader(io) - [rawimage(io) for _ in 1:N] -end - -""" - labels() - labels(:test) -Load the labels corresponding to each of the images returned from [`images()`](@ref). -Each label is a number from 0-9. -Return the 60,000 training labels by default; pass `:test` to retrieve the -10,000 test labels. -""" -function labels(set = :train) - deprecation_message() - load() - io = IOBuffer(read(set == :train ? TRAINLABELS : TESTLABELS)) - _, N = labelheader(io) - [rawlabel(io) for _ = 1:N] -end - -end diff --git a/src/data/housing.jl b/src/data/housing.jl deleted file mode 100644 index 4202f4d822..0000000000 --- a/src/data/housing.jl +++ /dev/null @@ -1,120 +0,0 @@ -""" -1. Title: Boston Housing Data -2. Sources: - (a) Origin: This dataset was taken from the StatLib library which is - maintained at Carnegie Mellon University. - (b) Creator: Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the - demand for clean air', J. Environ. Economics & Management, - vol.5, 81-102, 1978. - (c) Date: July 7, 1993 -3. Number of Instances: 506 -4. Number of Attributes: 13 continuous attributes (including "class" - attribute "MEDV"), 1 binary-valued attribute. -5. Attribute Information: - 1. CRIM per capita crime rate by town - 2. ZN proportion of residential land zoned for lots over - 25,000 sq.ft. - 3. INDUS proportion of non-retail business acres per town - 4. CHAS Charles River dummy variable (= 1 if tract bounds - river; 0 otherwise) - 5. NOX nitric oxides concentration (parts per 10 million) - 6. RM average number of rooms per dwelling - 7. AGE proportion of owner-occupied units built prior to 1940 - 8. DIS weighted distances to five Boston employment centres - 9. RAD index of accessibility to radial highways - 10. TAX full-value property-tax rate per 10,000 dollars - 11. PTRATIO pupil-teacher ratio by town - 12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks - by town - 13. LSTAT % lower status of the population - 14. MEDV Median value of owner-occupied homes in 1000's of dollars - Downloaded From: https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data -""" -module Housing - -using DelimitedFiles -using ..Data: deps, download_and_verify, deprecation_message - -#Uncomment if package exists -#const cache_prefix = "https://cache.julialang.org/" -const cache_prefix = "" - -function load() - isfile(deps("housing.data")) && return - - @info "Downloading the Boston housing Dataset" - download_and_verify("$(cache_prefix)http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", - deps("housing.data"), - "baadf72995725d76efe787b664e1f083388c79ba21ef9a7990d87f774184735a") - - #@info "Download complete. Working on the files" - path = deps() - isfile(deps("housing.data")) && touch(joinpath(path, "tempfile.data")) - open(joinpath(path, "tempfile.data"), "a") do fout - open(deps("housing.data"), "r") do fin - for line in eachline(fin) - line = replace(lstrip(line), r" +" => s",") - println(fout, line) - end - end - end - mv(joinpath(path, "tempfile.data"), deps("housing.data"), force=true) -end - -""" -Gets the targets for the Boston housing dataset, a 506 element array listing the targets for each example -```julia -julia> using Flux -julia> target = Flux.Data.Housing.targets() -julia> summary(target) -506×1 Array{Float64,2} -julia> target[1] -24.0 -""" -function targets() - deprecation_message() - load() - housing = readdlm(deps("housing.data"), ',') - reshape(Vector{Float64}(housing[1:end,end]), (506, 1)) -end - - -""" -Gets the names of the features provided in the dataset -""" -function feature_names() - ["crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","b","lstat"] -end - - -""" -Gets the features of the Boston Housing Dataset. This is a 506x13 Matrix of Float64 datatypes. -The values are in the order ["crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","b","lstat"]. -It has 506 examples. -```julia -julia> using Flux -julia> features = Flux.Data.Housing.features() -julia> summary(features) -506×13 Array{Float64,2} -julia> features[1, :] -13-element Array{Float64,1}: -0.00632 -18.0 -2.31 -0.0 -0.538 - ⋮ -296.0 -15.3 -396.9 -4.98 -""" -function features() - deprecation_message() - load() - housing = readdlm(deps("housing.data"), ',') - Matrix{Float64}(housing[1:end, 1:13]) -end - - -end \ No newline at end of file diff --git a/src/data/iris.jl b/src/data/iris.jl deleted file mode 100644 index 4529aa8a40..0000000000 --- a/src/data/iris.jl +++ /dev/null @@ -1,69 +0,0 @@ -""" -Fisher's classic iris dataset. -Measurements from 3 different species of iris: setosa, versicolor and -virginica. There are 50 examples of each species. -There are 4 measurements for each example: sepal length, sepal width, -petal length and petal width. The measurements are in centimeters. -The module retrieves the data from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/iris). -""" -module Iris - -using DelimitedFiles -using ..Data: deps, download_and_verify, deprecation_message - -# Uncomment if the iris.data file is cached to cache.julialang.org. -const cache_prefix = "https://cache.julialang.org/" - -function load() - isfile(deps("iris.data")) && return - - @info "Downloading iris dataset." - download_and_verify("$(cache_prefix)https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", - deps("iris.data"), - "6f608b71a7317216319b4d27b4d9bc84e6abd734eda7872b71a458569e2656c0") -end - -""" - labels() -Get the labels of the iris dataset, a 150 element array of strings listing the -species of each example. -```julia -julia> labels = Flux.Data.Iris.labels(); -julia> summary(labels) -"150-element Array{String,1}" -julia> labels[1] -"Iris-setosa" -``` -""" -function labels() - deprecation_message() - load() - iris = readdlm(deps("iris.data"), ',') - Vector{String}(iris[1:end, end]) -end - -""" - features() -Get the features of the iris dataset. This is a 4x150 matrix of Float64 -elements. It has a row for each feature (sepal length, sepal width, -petal length, petal width) and a column for each example. -```julia -julia> features = Flux.Data.Iris.features(); -julia> summary(features) -"4×150 Array{Float64,2}" -julia> features[:, 1] -4-element Array{Float64,1}: - 5.1 - 3.5 - 1.4 - 0.2 -``` -""" -function features() - deprecation_message() - load() - iris = readdlm(deps("iris.data"), ',') - Matrix{Float64}(iris[1:end, 1:4]') -end - -end diff --git a/src/data/mnist.jl b/src/data/mnist.jl deleted file mode 100644 index 45e51178ae..0000000000 --- a/src/data/mnist.jl +++ /dev/null @@ -1,117 +0,0 @@ -module MNIST - -using CodecZlib, Colors -using ..Data: download_and_verify, deprecation_message - -const Gray = Colors.Gray{Colors.N0f8} - -const dir = if isnothing(@__DIR__) - joinpath("deps", "mnist") - else - joinpath(@__DIR__, "../../deps/mnist") -end - -function gzopen(f, file) - open(file) do io - f(GzipDecompressorStream(io)) - end -end - -function load() - mkpath(dir) - cd(dir) do - for (file, hash) in [("train-images-idx3-ubyte", "440fcabf73cc546fa21475e81ea370265605f56be210a4024d2ca8f203523609"), - ("train-labels-idx1-ubyte", "3552534a0a558bbed6aed32b30c495cca23d567ec52cac8be1a0730e8010255c"), - ("t10k-images-idx3-ubyte" , "8d422c7b0a1c1c79245a5bcf07fe86e33eeafee792b84584aec276f5a2dbc4e6"), - ("t10k-labels-idx1-ubyte" , "f7ae60f92e00ec6debd23a6088c31dbd2371eca3ffa0defaefb259924204aec6")] - isfile(file) && continue - @info "Downloading MNIST dataset" - download_and_verify("https://cache.julialang.org/http://yann.lecun.com/exdb/mnist/$file.gz", "$file.gz", hash) - open(file, "w") do io - write(io, gzopen(read, "$file.gz")) - end - end - end -end - -const IMAGEOFFSET = 16 -const LABELOFFSET = 8 - -const NROWS = 28 -const NCOLS = 28 - -const TRAINIMAGES = joinpath(dir, "train-images-idx3-ubyte") -const TRAINLABELS = joinpath(dir, "train-labels-idx1-ubyte") -const TESTIMAGES = joinpath(dir, "t10k-images-idx3-ubyte") -const TESTLABELS = joinpath(dir, "t10k-labels-idx1-ubyte") - -function imageheader(io::IO) - magic_number = bswap(read(io, UInt32)) - total_items = bswap(read(io, UInt32)) - nrows = bswap(read(io, UInt32)) - ncols = bswap(read(io, UInt32)) - return magic_number, Int(total_items), Int(nrows), Int(ncols) -end - -function labelheader(io::IO) - magic_number = bswap(read(io, UInt32)) - total_items = bswap(read(io, UInt32)) - return magic_number, Int(total_items) -end - -function rawimage(io::IO) - img = Array{Gray}(undef, NCOLS, NROWS) - for i in 1:NCOLS, j in 1:NROWS - img[i, j] = reinterpret(Colors.N0f8, read(io, UInt8)) - end - return img -end - -function rawimage(io::IO, index::Integer) - seek(io, IMAGEOFFSET + NROWS * NCOLS * (index - 1)) - return rawimage(io) -end - -rawlabel(io::IO) = Int(read(io, UInt8)) - -function rawlabel(io::IO, index::Integer) - seek(io, LABELOFFSET + (index - 1)) - return rawlabel(io) -end - -getfeatures(io::IO, index::Integer) = vec(getimage(io, index)) - -""" - images() - images(:test) -Load the MNIST images. -Each image is a 28×28 array of `Gray` colour values -(see [Colors.jl](https://github.com/JuliaGraphics/Colors.jl)). -Return the 60,000 training images by default; pass `:test` to retrieve the -10,000 test images. -""" -function images(set = :train) - deprecation_message() - load() - io = IOBuffer(read(set == :train ? TRAINIMAGES : TESTIMAGES)) - _, N, nrows, ncols = imageheader(io) - [rawimage(io) for _ in 1:N] -end - -""" - labels() - labels(:test) -Load the labels corresponding to each of the images returned from [`images()`](@ref). -Each label is a number from 0-9. -Return the 60,000 training labels by default; pass `:test` to retrieve the -10,000 test labels. -""" -function labels(set = :train) - deprecation_message() - load() - io = IOBuffer(read(set == :train ? TRAINLABELS : TESTLABELS)) - _, N = labelheader(io) - [rawlabel(io) for _ = 1:N] -end - -end # module diff --git a/src/data/sentiment.jl b/src/data/sentiment.jl deleted file mode 100644 index aae8f70930..0000000000 --- a/src/data/sentiment.jl +++ /dev/null @@ -1,73 +0,0 @@ -"Stanford Sentiment Treebank dataset." -module Sentiment - -using ZipFile -using ..Data: deps, download_and_verify, deprecation_message - -function load() - isfile(deps("sentiment.zip")) && return - @info "Downloading sentiment treebank dataset" - download_and_verify("https://cache.julialang.org/https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip", - deps("sentiment.zip"), "5c613a4f673fc74097d523a2c83f38e0cc462984d847b82c7aaf36b01cbbbfcc") -end - -getfile(r, name) = r.files[findfirst(x -> x.name == name, r.files)] - -function getfile(name) - r = ZipFile.Reader(deps("sentiment.zip")) - text = read(getfile(r, "trees/$name"), String) - close(r) - return text -end - -using ..Data: Tree - -totree_(n, w) = Tree{Any}((parse(Int, n), w)) -totree_(n, a, b) = Tree{Any}((parse(Int, n), nothing), totree(a), totree(b)) -totree(t::Expr) = totree_(t.args...) - -function parsetree(s) - s = replace(s, "\\" => "") - s = replace(s, "\$" => "\\\$") - s = replace(s, r"[^ \n\(\)]+" => s -> "\"$s\"") - s = replace(s, " " => ", ") - return totree(Meta.parse(s)) -end - -function gettrees(name) - load() - ss = split(getfile("$name.txt"), '\n', keepempty = false) - return parsetree.(ss) -end - -""" - train() -Return the train split of the Stanford Sentiment Treebank. -The data is in [treebank](https://en.wikipedia.org/wiki/Treebank) format. -""" -function train() - deprecation_message() - gettrees("train") -end - -""" - test() -Return the test split of the Stanford Sentiment Treebank. -The data is in [treebank](https://en.wikipedia.org/wiki/Treebank) format. -""" -function test() - deprecation_message() - gettrees("test") -end - -""" - dev() -Return the dev split of the Stanford Sentiment Treebank. -The data is in [treebank](https://en.wikipedia.org/wiki/Treebank) format. -""" -function dev() - deprecation_message() - gettrees("dev") -end - -end \ No newline at end of file diff --git a/src/deprecations.jl b/src/deprecations.jl index e6e7360a22..e1921f4ca9 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -1,23 +1,4 @@ # v0.12 deprecations -@deprecate Dropout(p, dims) Dropout(p; dims=dims) -@deprecate InstanceNorm(λ, β, γ, μ, σ², ϵ, momentum, active=nothing) InstanceNorm(λ, β, γ, μ, σ², ϵ, momentum, true, true, active, length(β)) -@deprecate BatchNorm(λ, β, γ, μ, σ², ϵ, momentum, active=nothing) BatchNorm(λ, β, γ, μ, σ², ϵ, momentum, true, true, active, length(β)) -@deprecate GroupNorm(G, λ, β, γ, μ, σ², ϵ, momentum, active=nothing) GroupNorm(G, λ, β, γ, μ, σ², ϵ, momentum, true, true, active, length(β)) -@deprecate outdims(f, inputsize) outputsize(f, inputsize) -@deprecate Conv(; weight, bias, activation=identity, kws...) Conv(weight, bias, activation; kws...) -@deprecate ConvTranspose(; weight, bias, activation=identity, kws...) ConvTranspose(weight, bias, activation; kws...) -@deprecate DepthwiseConv(; weight, bias, activation=identity, kws...) DepthwiseConv(weight, bias, activation; kws...) - -function Base.getproperty(a::Dense, s::Symbol) - if s === :W - Base.depwarn("field name dense.W is deprecated in favour of dense.weight", :Dense) - return getfield(a, :weight) - elseif s === :b - Base.depwarn("field name dense.b is deprecated in favour of dense.bias", :Dense) - return getfield(a, :bias) - end - return getfield(a, s) -end function ones(dims...) Base.depwarn("Flux.ones(size...) is deprecated, please use Flux.ones32(size...) or Base.ones(Float32, size...)", :ones) @@ -34,7 +15,6 @@ zeros(T::Type, dims...) = Base.zeros(T, dims...) ones32(::Type, dims...) = throw(ArgumentError("Flux.ones32 is always Float32, use Base.ones to specify the element type")) zeros32(::Type, dims...) = throw(ArgumentError("Flux.zeros32 is always Float32, use Base.zeros to specify the element type")) - # v0.13 deprecations function Broadcast.broadcasted(f::Recur, args...) # This had an explicit @adjoint rule, calling Zygote.∇map(__context__, f, args...), until v0.12 diff --git a/src/functor.jl b/src/functor.jl index 4e76c924bd..b056ff9574 100644 --- a/src/functor.jl +++ b/src/functor.jl @@ -60,6 +60,8 @@ The behaviour of `params` on custom types can be customized using [`Functor.@fun # Examples ```jldoctest +julia> using Flux: params + julia> params(Chain(Dense(ones(2,3)), softmax)) # unpacks Flux models Params([[1.0 1.0 1.0; 1.0 1.0 1.0], [0.0, 0.0]]) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 42310d0b7c..3e22895e82 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -27,8 +27,12 @@ julia> m2 = Chain(enc = Chain(Flux.flatten, Dense(10, 5, tanh)), julia> m2(x) == (m2[:dec] ∘ m2[:enc])(x) true ``` + +For large models, there is a special type-unstable path which can reduce compilation +times. This can be used by supplying a vector of layers `Chain([layer1, layer2, ...])`. +This feature is somewhat experimental, beware! """ -struct Chain{T<:Union{Tuple, NamedTuple}} +struct Chain{T<:Union{Tuple, NamedTuple, AbstractVector}} layers::T end @@ -44,10 +48,22 @@ end @functor Chain -applychain(::Tuple{}, x) = x -applychain(fs::Tuple, x) = applychain(tail(fs), first(fs)(x)) +(c::Chain)(x) = applychain(c.layers, x) + +@generated function applychain(layers::Tuple{Vararg{<:Any,N}}, x) where {N} + symbols = vcat(:x, [gensym() for _ in 1:N]) + calls = [:($(symbols[i+1]) = layers[$i]($(symbols[i]))) for i in 1:N] + Expr(:block, calls...) +end -(c::Chain)(x) = applychain(Tuple(c.layers), x) +applychain(layers::NamedTuple, x) = applychain(Tuple(layers), x) + +function applychain(layers::AbstractVector, x) # type-unstable path, helps compile times + for f in layers + x = f(x) + end + x +end Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]) Base.getindex(c::Chain{<:NamedTuple}, i::AbstractArray) = @@ -60,6 +76,7 @@ function Base.show(io::IO, c::Chain) end _show_layers(io, layers::Tuple) = join(io, layers, ", ") _show_layers(io, layers::NamedTuple) = join(io, ["$k = $v" for (k, v) in pairs(layers)], ", ") +_show_layers(io, layers::AbstractVector) = (print(io, "["); join(io, layers, ", "); print(io, "]")) # This is a temporary and naive implementation # it might be replaced in the future for better performance @@ -132,24 +149,9 @@ struct Dense{F, M<:AbstractMatrix, B} end function Dense(in::Integer, out::Integer, σ = identity; - initW = nothing, initb = nothing, init = glorot_uniform, bias=true) - W = if initW !== nothing - Base.depwarn("keyword initW is deprecated, please use init (which similarly accepts a funtion like randn)", :Dense) - initW(out, in) - else - init(out, in) - end - - b = if bias === true && initb !== nothing - Base.depwarn("keyword initb is deprecated, please simply supply the bias vector, bias=initb(out)", :Dense) - initb(out) - else - bias - end - - return Dense(W, b, σ) + Dense(init(out, in), bias, σ) end @functor Dense @@ -188,21 +190,7 @@ struct Diagonal{T} β::T end -function Diagonal(sz::Integer...; initα = nothing, initβ = nothing) - α = if initα !== nothing - Base.depwarn("keyword initα is deprecated, please simply supply the desired vectors", :Diagonal) - initα(sz...) - else - ones32(sz...) - end - β = if initβ !== nothing - Base.depwarn("keyword initβ is deprecated, please simply supply the desired vectors", :Diagonal) - initβ(sz...) - else - zeros32(sz...) - end - Diagonal(α, β) -end +Diagonal(sz::Integer...) = Diagonal(ones32(sz...), zeros32(sz...)) @functor Diagonal diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 1cda764d0d..eb0ea8604e 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -31,7 +31,7 @@ end """ Conv(filter, in => out, σ = identity; - stride = 1, pad = 0, dilation = 1, groups = 1, [bias, weight, init]) + stride = 1, pad = 0, dilation = 1, groups = 1, [bias, init]) Standard convolutional layer. `filter` is a tuple of integers specifying the size of the convolutional kernel; @@ -61,11 +61,8 @@ Then: Keywords to control initialization of the layer: * `init` - Function used to generate initial weights. Defaults to `glorot_uniform`. -* `weight` - Initial weights of the layer. Typically an array, and can be used to override - other configurations. By default, these are generated using [`convfilter`](@ref). * `bias` - Initial bias is zero by default, this can be disabled entirely by setting it to - [`Flux.Zeros()`](@ref) or equivalently `false`, or another vector provided as - `bias = randn(Float32, out)`. + `false`, or another vector explicitly as `bias = randn(Float32, out)`. See also [`ConvTranspose`](@ref), [`DepthwiseConv`](@ref), [`CrossCor`](@ref). @@ -121,7 +118,7 @@ Conv((3,), 4 => 5, σ) # 65 parameters julia> c1(randn(100, 4, 64)) |> size (98, 5, 64) -julia> params(c1) |> length +julia> Flux.params(c1) |> length 2 ``` """ @@ -136,8 +133,9 @@ end function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, groups = 1, - weight = convfilter(k, ch; init, groups), bias = true) where N - + bias = true) where N + + weight = convfilter(k, ch; init, groups) Conv(weight, bias, σ; stride, pad, dilation, groups) end @@ -250,10 +248,10 @@ end function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, groups = 1, - weight = convfilter(k, reverse(ch); init, groups), bias = true, ) where N + weight = convfilter(k, reverse(ch); init, groups) ConvTranspose(weight, bias, σ; stride, pad, dilation, groups) end @@ -334,7 +332,7 @@ struct DepthwiseConv{N,M,F,A,V} end """ - DepthwiseConv(weight::AbstractArray, bias, [activation; stride, pad, dilation]) + DepthwiseConv(weight::AbstractArray, [bias, activation; stride, pad, dilation]) Constructs a layer with the given weight and bias arrays. Accepts the same keywords as the `DepthwiseConv((4,4), 3 => 6, relu)` method. @@ -350,8 +348,9 @@ end function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = depthwiseconvfilter(k, ch, init = init), bias = true) where N + bias = true) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" + weight = depthwiseconvfilter(k, ch, init = init) return DepthwiseConv(weight, bias, σ; stride, pad, dilation) end @@ -439,8 +438,9 @@ end function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convfilter(k, ch, init = init), bias = true) where N + bias = true) where N + weight = convfilter(k, ch, init = init) return CrossCor(weight, bias, σ; stride, pad, dilation) end diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 53cb391716..686140f5e1 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -43,12 +43,14 @@ dropout_mask(rng, x::CuArray, p; kwargs...) = throw(ArgumentError("x isa CuArray, but rng isa $(typeof(rng)). dropout_mask only support CUDA.RNG for CuArrays.")) dropout_mask(rng, x, p; kwargs...) = _dropout_mask(rng, x, p; kwargs...) function _dropout_mask(rng, x, p; dims=:) - y = rand!(rng, similar(x, _dropout_shape(x, dims))) + realfptype = float(real(eltype(x))) + y = rand!(rng, similar(x, realfptype, _dropout_shape(x, dims))) y .= _dropout_kernel.(y, p, 1 - p) return y end -ChainRulesCore.@non_differentiable dropout_mask(::Any, ::Any, ::Any) +# TODO move this to NNlib +Zygote.ChainRulesCore.@non_differentiable dropout_mask(rng, x, p) """ Dropout(p; dims=:, rng = rng_from_array()) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 14e3b8801e..9734990c1e 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -83,23 +83,10 @@ rnn.state = hidden(rnn.cell) reset!(m::Recur) = (m.state = m.cell.state0) reset!(m) = foreach(reset!, functor(m)[1]) - -# TODO remove in v0.13 -function Base.getproperty(m::Recur, sym::Symbol) - if sym === :init - Zygote.ignore() do - @warn "Recur field :init has been deprecated. To access initial state weights, use m::Recur.cell.state0 instead." - end - return getfield(m.cell, :state0) - else - return getfield(m, sym) - end -end - flip(f, xs) = reverse(f.(reverse(xs))) function (m::Recur)(x::AbstractArray{T, 3}) where T - h = [m(view(x, :, :, i)) for i in 1:size(x, 3)] + h = [m(x_t) for x_t in eachslice(x, dims=3)] sze = size(h[1]) reshape(reduce(hcat, h), sze[1], sze[2], length(h)) end @@ -192,18 +179,6 @@ julia> r(rand(Float32, 3, 10)) |> size # batch size of 10 RNN(a...; ka...) = Recur(RNNCell(a...; ka...)) Recur(m::RNNCell) = Recur(m, m.state0) -# TODO remove in v0.13 -function Base.getproperty(m::RNNCell, sym::Symbol) - if sym === :h - Zygote.ignore() do - @warn "RNNCell field :h has been deprecated. Use m::RNNCell.state0 instead." - end - return getfield(m, :state0) - else - return getfield(m, sym) - end -end - # LSTM struct LSTMCell{A,V,S} @@ -272,23 +247,6 @@ julia> l(rand(Float32, 3, 10)) |> size # batch size of 10 LSTM(a...; ka...) = Recur(LSTMCell(a...; ka...)) Recur(m::LSTMCell) = Recur(m, m.state0) -# TODO remove in v0.13 -function Base.getproperty(m::LSTMCell, sym::Symbol) - if sym === :h - Zygote.ignore() do - @warn "LSTMCell field :h has been deprecated. Use m::LSTMCell.state0[1] instead." - end - return getfield(m, :state0)[1] - elseif sym === :c - Zygote.ignore() do - @warn "LSTMCell field :c has been deprecated. Use m::LSTMCell.state0[2] instead." - end - return getfield(m, :state0)[2] - else - return getfield(m, sym) - end -end - # GRU function _gru_output(gxs, ghs, bs) @@ -358,19 +316,6 @@ julia> g(rand(Float32, 3, 10)) |> size # batch size of 10 GRU(a...; ka...) = Recur(GRUCell(a...; ka...)) Recur(m::GRUCell) = Recur(m, m.state0) -# TODO remove in v0.13 -function Base.getproperty(m::GRUCell, sym::Symbol) - if sym === :h - Zygote.ignore() do - @warn "GRUCell field :h has been deprecated. Use m::GRUCell.state0 instead." - end - return getfield(m, :state0) - else - return getfield(m, sym) - end -end - - # GRU v3 struct GRUv3Cell{A,V,S} diff --git a/src/layers/show.jl b/src/layers/show.jl index 85faec3c59..a37af36065 100644 --- a/src/layers/show.jl +++ b/src/layers/show.jl @@ -14,11 +14,12 @@ for T in [ end function _big_show(io::IO, obj, indent::Int=0, name=nothing) + pre, post = obj isa Chain{<:AbstractVector} ? ("([", "])") : ("(", ")") children = _show_children(obj) if all(_show_leaflike, children) _layer_show(io, obj, indent, name) else - println(io, " "^indent, isnothing(name) ? "" : "$name = ", nameof(typeof(obj)), "(") + println(io, " "^indent, isnothing(name) ? "" : "$name = ", nameof(typeof(obj)), pre) if obj isa Chain{<:NamedTuple} && children == getfield(obj, :layers) # then we insert names -- can this be done more generically? for k in Base.keys(obj) @@ -35,10 +36,10 @@ function _big_show(io::IO, obj, indent::Int=0, name=nothing) end end if indent == 0 # i.e. this is the outermost container - print(io, ")") + print(io, rpad(post, 2)) _big_finale(io, obj) else - println(io, " "^indent, "),") + println(io, " "^indent, post, ",") end end end @@ -90,18 +91,18 @@ function _big_finale(io::IO, m) noncnt = _childarray_sum(_->1, m) - length(ps) if noncnt > 0 nonparam = underscorise(_childarray_sum(length, m) - sum(length, ps)) - printstyled(io, " "^09, "# Total: ", length(ps), " trainable arrays, "; color=:light_black) + printstyled(io, " "^08, "# Total: ", length(ps), " trainable arrays, "; color=:light_black) println(io, pars, " parameters,") printstyled(io, " "^10, "# plus ", noncnt, " non-trainable, ", nonparam, " parameters, summarysize "; color=:light_black) print(io, bytes, ".") else - printstyled(io, " "^19, "# Total: ", length(ps), " arrays, "; color=:light_black) + printstyled(io, " "^18, "# Total: ", length(ps), " arrays, "; color=:light_black) print(io, pars, " parameters, ", bytes, ".") end end end -_childarray_sum(f, x::AbstractArray) = f(x) +_childarray_sum(f, x::AbstractArray{<:Number}) = f(x) _childarray_sum(f, x) = isleaf(x) ? 0 : sum(y -> _childarray_sum(f, y), Functors.children(x)) # utility functions diff --git a/src/optimise/optimisers.jl b/src/optimise/optimisers.jl index 4f47c8d058..ec7844e256 100644 --- a/src/optimise/optimisers.jl +++ b/src/optimise/optimisers.jl @@ -23,7 +23,7 @@ opt = Descent() opt = Descent(0.3) -ps = params(model) +ps = Flux.params(model) gs = gradient(ps) do loss(x, y) @@ -500,7 +500,7 @@ opt = ADAMW(0.001, (0.89, 0.995), 0.1) ``` """ ADAMW(η = 0.001, β = (0.9, 0.999), decay = 0) = - Optimiser(ADAM(1, β), WeightDecay(decay), Descent(η)) + Optimiser(ADAM(η, β), WeightDecay(decay)) """ AdaBelief(η = 0.001, β::Tuple = (0.9, 0.999), ϵ = $EPS) diff --git a/test/layers/basic.jl b/test/layers/basic.jl index 968ddd506f..ca8e15a643 100644 --- a/test/layers/basic.jl +++ b/test/layers/basic.jl @@ -29,16 +29,18 @@ import Flux: activations @test m == fmap(identity, m) # does not forget names @test_throws ArgumentError Chain(layers = Dense(10, 10), two = identity) # reserved name + + @test_nowarn Chain([Dense(10, 5, σ), Dense(5, 2)])(randn(Float32, 10)) # vector of layers end @testset "Activations" begin c = Chain(Dense(3,5,relu), Dense(5,1,relu)) X = Float32.([1.0; 1.0; 1.0]) - @test_nowarn gradient(()->Flux.activations(c, X)[2][1], params(c)) + @test_nowarn gradient(()->Flux.activations(c, X)[2][1], Flux.params(c)) c2 = Chain(enc = c[1], dec = c[2]) @test Flux.activations(c, X) == Flux.activations(c2, X) - @test_nowarn gradient(()->Flux.activations(c2, X)[2][1], params(c2)) + @test_nowarn gradient(()->Flux.activations(c2, X)[2][1], Flux.params(c2)) end @testset "Dense" begin @@ -126,7 +128,7 @@ import Flux: activations @testset "params" begin mo = Maxout(()->Dense(32, 64), 4) - ps = params(mo) + ps = Flux.params(mo) @test length(ps) == 8 #4 alts, each with weight and bias end end @@ -239,7 +241,7 @@ import Flux: activations Parallel(f_cnt, sin)(1) @test CNT[] == 3 end - + # Ref https://github.com/FluxML/Flux.jl/issues/1673 @testset "Input domain" begin struct Input @@ -276,7 +278,7 @@ import Flux: activations vocab_size, embed_size = 10, 4 m = Flux.Embedding(vocab_size, embed_size) @test size(m.weight) == (embed_size, vocab_size) - + x = rand(1:vocab_size, 3) y = m(x) @test y isa Matrix{Float32} @@ -297,3 +299,41 @@ import Flux: activations @test_throws DimensionMismatch m(OneHotVector(3, 1000)) end end + +@testset "second derivatives" begin + m1 = Chain(Dense(3,4,tanh; bias=false), Dense(4,2)) + @test Zygote.hessian_dual(sum∘m1, [1,2,3]) ≈ Zygote.hessian_reverse(sum∘m1, [1,2,3]) + + m1v = Chain([m1[1], m1[2]]) # vector of layers + @test Zygote.hessian_dual(sum∘m1v, [1,2,3]) ≈ Zygote.hessian_dual(sum∘m1, [1,2,3]) + @test_broken Zygote.hessian_dual(sum∘m1v, [1,2,3]) ≈ Zygote.hessian_reverse(sum∘m1v, [1,2,3]) + + # NNlib's softmax gradient writes in-place + m2 = Chain(Dense(3,4,tanh), Dense(4,2), softmax) + @test_broken Zygote.hessian_dual(sum∘m2, [1,2,3]) ≈ Zygote.hessian_reverse(sum∘m2, [1,2,3]) + + # https://github.com/FluxML/NNlib.jl/issues/362 + m3 = Chain(Conv((3,), 2 => 3, relu), Dense(2,2)) + x3 = cat(Float32[1 2; 3 4; 5 6; 7 8]; dims=3) + @test Zygote.hessian_dual(sum∘m3, x3) ≈ Zygote.hessian_reverse(sum∘m3, x3) +end + +@testset "gradients of Chain{Vector}" begin + m1 = Chain(Dense(3,4,tanh; bias=false), Dense(4,2)) + m1v = Chain([m1[1], m1[2]]) + @test sum(length, params(m1)) == sum(length, params(m1v)) + + x1 = randn(Float32,3,5) + @test m1(x1) ≈ m1v(x1) + + y1 = rand(Bool,2,5) + g1 = gradient(() -> Flux.Losses.logitcrossentropy(m1(x1), y1), params(m1)) + g1v = gradient(() -> Flux.Losses.logitcrossentropy(m1v(x1), y1), params(m1v)) + @test g1[m1[1].weight] ≈ g1v[m1v[1].weight] + @test g1[m1[2].bias] ≈ g1v[m1v[2].bias] + + @test Flux.destructure(m1)[1] ≈ Flux.destructure(m1v)[1] + z1 = rand(22); + @test Flux.destructure(m1)[2](z1)[1].weight ≈ Flux.destructure(m1v)[2](z1)[1].weight + # Note that Flux.destructure(m1v)[2](z) has a Chain{Tuple}, as does m1v[1:2] +end diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 7730b2af20..9ce1a27aa0 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -58,7 +58,7 @@ end opt = Descent() for _ = 1:10^3 - gs = gradient(params(bias)) do + gs = gradient(Flux.params(bias)) do Flux.Losses.mse(bias(ip), op) end Flux.Optimise.update!(opt, params(bias), gs) @@ -160,7 +160,7 @@ end m = ConvTranspose((3,3), 1=>1) # Test that the gradient call does not throw: #900 - @test gradient(()->sum(m(x)), params(m)) isa Flux.Zygote.Grads + @test gradient(()->sum(m(x)), Flux.params(m)) isa Flux.Zygote.Grads x = zeros(Float32, 5, 5, 2, 4) m = ConvTranspose((3,3), 2=>3) diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index 9ab74e4a1d..7ae15aeff9 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -5,6 +5,11 @@ evalwgrad(f, x...) = pullback(f, x...)[1] @testset "Dropout" begin @testset for rng_kwargs in ((), (; rng = MersenneTwister())) + x = [1.0+0im,2.0+1im,3.0+3im] + @test x == Dropout(0.1; rng_kwargs...)(x) + @test x == evalwgrad(Dropout(0; rng_kwargs...), x) + @test zero(x) == evalwgrad(Dropout(1; rng_kwargs...), x) + x = [1.,2.,3.] @test x == Dropout(0.1; rng_kwargs...)(x) @test x == evalwgrad(Dropout(0; rng_kwargs...), x) @@ -121,7 +126,7 @@ end 2.0 4.0 6.0] @test Flux.hasaffine(m) == true - @test length(params(m)) == 2 + @test length(Flux.params(m)) == 2 @test m.β == [0, 0] # initβ(2) @test m.γ == [1, 1] # initγ(2) @@ -205,7 +210,7 @@ end let m = InstanceNorm(2; affine=true, track_stats=true), sizes = (3, 2, 2), x = reshape(collect(1:prod(sizes)), sizes) - @test length(params(m)) == 2 + @test length(Flux.params(m)) == 2 x = Float32.(x) @test m.β == [0, 0] # initβ(2) @test m.γ == [1, 1] # initγ(2) @@ -268,7 +273,7 @@ end x = reshape(collect(1:prod(sizes)), sizes) @test Flux.hasaffine(m) == true - @test length(params(m)) == 2 + @test length(Flux.params(m)) == 2 x = Float64.(x) y = m(x) μ = mean(x, dims=1) @@ -281,7 +286,7 @@ end let m = InstanceNorm(2, sigmoid), sizes = (3, 2, 2), x = reshape(collect(1:prod(sizes)), sizes) @test Flux.hasaffine(m) == false - @test length(params(m)) == 0 + @test length(Flux.params(m)) == 0 x = Float64.(x) y = m(x) @@ -348,10 +353,10 @@ end m = LayerNorm((2,3,4)) @test Flux.hasaffine(m) == true - @test length(params(m)) == 2 + @test length(Flux.params(m)) == 2 m = LayerNorm((2,3,4), affine=false) @test Flux.hasaffine(m) == false - @test length(params(m)) == 0 + @test length(Flux.params(m)) == 0 end @testset "GroupNorm" begin @@ -361,7 +366,7 @@ end let m = GroupNorm(4,2, track_stats=true), sizes = (3,4,2), x = reshape(collect(1:prod(sizes)), sizes) - @test length(params(m)) == 2 + @test length(Flux.params(m)) == 2 x = Float32.(x) @test m.β == [0, 0, 0, 0] # initβ(32) @test m.γ == [1, 1, 1, 1] # initγ(32) @@ -453,3 +458,8 @@ end @test BN(x) ≈ GN(x) end end + +@testset "second derivatives" begin + m1 = Dropout(0.5) + @test Zygote.hessian_reverse(sum∘m1, [1.0,2.0,3.0]) == zeros(3, 3) +end diff --git a/test/runtests.jl b/test/runtests.jl index a6abd609d2..706f126451 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,9 +1,11 @@ using Flux using Flux.Data using Flux: OneHotArray, OneHotMatrix, OneHotVector +using Flux: params using Test using Random, Statistics, LinearAlgebra using IterTools: ncycle +using Zygote using CUDA Random.seed!(0)