From 485d8d6be796e72b96b7baaba3ae4ec8183b5d96 Mon Sep 17 00:00:00 2001
From: willtebbutt <wct23@cam.ac.uk>
Date: Tue, 1 Feb 2022 12:24:39 +0000
Subject: [PATCH] Tidy up for 0.8 (#228)

* Rename and move

* Remove redundant comments

* Shuffle and remove dead code

* Update NEWS

* Bump minor version

* Update news

* Bump docs deps

* Import things

* Tweak docs builder

* Improve test coverage

* Temporary performance bug fix

* stabilise getting_started example

* Finish off extended_mauna_loa upgrade
---
 NEWS.md                                       | 22 +++++
 Project.toml                                  |  9 +-
 docs/Project.toml                             |  2 +-
 docs/make.jl                                  | 13 ++-
 docs/src/input_types.md                       |  2 +-
 docs/src/internals.md                         | 36 +++----
 .../custom_affine_transformations/script.jl   | 10 +-
 examples/differentiation/script.jl            |  4 +-
 examples/extended_mauna_loa/script.jl         |  8 ++
 examples/getting_started/script.jl            |  5 +-
 src/Stheno.jl                                 | 75 ++++++---------
 .../addition.jl                               |  6 +-
 .../compose.jl                                | 34 ++-----
 .../cross.jl                                  | 68 +++++++------
 .../product.jl                                |  4 +-
 src/composite/gradient.jl                     |  9 --
 src/composite/integrate.jl                    | 21 ----
 src/deprecate.jl                              |  7 --
 ...aussian_process_probabilistic_programme.jl | 55 +++--------
 src/gp/atomic_gp.jl                           | 37 ++++++++
 .../composite_gp.jl => gp/derived_gp.jl}      | 26 ++---
 src/gp/gp.jl                                  | 37 --------
 src/{ => gp}/sparse_finite_gp.jl              | 32 +++----
 src/{abstract_gp.jl => gp/util.jl}            |  3 -
 src/input_collection_types.jl                 | 95 +++++++++++++++++++
 src/util/abstract_data_set.jl                 | 57 -----------
 src/util/block_arrays.jl                      | 25 -----
 src/util/covariance_matrices.jl               | 23 -----
 src/util/proper_type_piracy.jl                |  3 -
 src/util/zygote_rules.jl                      | 28 ------
 test/Project.toml                             |  2 +
 .../addition.jl                               | 14 +--
 .../compose.jl                                | 41 +++++---
 .../cross.jl                                  | 48 +++++++---
 .../integrate.jl                              |  0
 .../product.jl                                | 16 ++--
 .../test_util.jl                              |  0
 ...aussian_process_probabilistic_programme.jl |  6 +-
 test/gp/{gp.jl => atomic_gp.jl}               |  6 +-
 test/gp/derived_gp.jl                         |  5 +
 test/{ => gp}/sparse_finite_gp.jl             |  9 +-
 test/{abstract_gp.jl => gp/util.jl}           | 14 +--
 ..._data_set.jl => input_collection_types.jl} | 25 +++--
 test/runtests.jl                              | 46 +++------
 test/util/block_arrays.jl                     | 32 -------
 test/util/covariance_matrices.jl              | 32 -------
 test/util/zygote_rules.jl                     | 24 -----
 47 files changed, 458 insertions(+), 618 deletions(-)
 rename src/{composite => affine_transformations}/addition.jl (93%)
 rename src/{composite => affine_transformations}/compose.jl (78%)
 rename src/{composite => affine_transformations}/cross.jl (53%)
 rename src/{composite => affine_transformations}/product.jl (95%)
 delete mode 100644 src/composite/gradient.jl
 delete mode 100644 src/composite/integrate.jl
 delete mode 100644 src/deprecate.jl
 create mode 100644 src/gp/atomic_gp.jl
 rename src/{composite/composite_gp.jl => gp/derived_gp.jl} (54%)
 delete mode 100644 src/gp/gp.jl
 rename src/{ => gp}/sparse_finite_gp.jl (57%)
 rename src/{abstract_gp.jl => gp/util.jl} (89%)
 create mode 100644 src/input_collection_types.jl
 delete mode 100644 src/util/abstract_data_set.jl
 delete mode 100644 src/util/block_arrays.jl
 delete mode 100644 src/util/covariance_matrices.jl
 delete mode 100644 src/util/proper_type_piracy.jl
 delete mode 100644 src/util/zygote_rules.jl
 rename test/{composite => affine_transformations}/addition.jl (89%)
 rename test/{composite => affine_transformations}/compose.jl (83%)
 rename test/{composite => affine_transformations}/cross.jl (66%)
 rename test/{composite => affine_transformations}/integrate.jl (100%)
 rename test/{composite => affine_transformations}/product.jl (92%)
 rename test/{composite => affine_transformations}/test_util.jl (100%)
 rename test/gp/{gp.jl => atomic_gp.jl} (90%)
 create mode 100644 test/gp/derived_gp.jl
 rename test/{ => gp}/sparse_finite_gp.jl (85%)
 rename test/{abstract_gp.jl => gp/util.jl} (86%)
 rename test/{util/abstract_data_set.jl => input_collection_types.jl} (68%)
 delete mode 100644 test/util/block_arrays.jl
 delete mode 100644 test/util/covariance_matrices.jl
 delete mode 100644 test/util/zygote_rules.jl

diff --git a/NEWS.md b/NEWS.md
index 80fd1e1d..07d804d4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,28 @@ between versions, and discuss new features.
 If you find a breaking change this is not reported here, please either raise an issue or
 make a PR to ammend this document.
 
+## 0.8.0
+
+### Breaking Changes
+
+This version contains some re-naming, specifically
+- `WrappedGP` -> `AtomicGP`,
+- `wrap` -> `atomic`, and
+- `CompositeGP` -> `DerivedGP`,
+which better reflect what these types / functions represent in the context of a `GPPP`.
+It's possible that you've never interacted with them, in which case there's nothing to
+worry about.
+
+Lots of code has been moved around in order to better organise everything.
+
+A method of `vcat` has been added to build `BlockData` from `GPPPInput`s. This can make
+your code look a bit nicer, so you might want to use it.
+
+Additionally, this package no longer depends explicitly upon Zygote or ZygoteRules.
+Instead, all AD rules that are needed use ChainRulesCore directly.
+
+Deprecations mentioned in the 0.7 release have also been dropped.
+
 ## 0.7.16
 - Deprecate `approx_posterior` in favour of `posterior`. This is being removed because it has been removed in AbstractGPs in favour of `posterior`. It will be entirely removed in the next breaking release.
 - Remove some redundant testing infrastructure and tidy up the file structure slightly.
diff --git a/Project.toml b/Project.toml
index ec9b2c38..e19c87a0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,29 +1,22 @@
 name = "Stheno"
 uuid = "8188c328-b5d6-583d-959b-9690869a5511"
-version = "0.7.18"
+version = "0.8.0"
 
 [deps]
 AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918"
 BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 KernelFunctions = "ec8451be-7e33-11e9-00cf-bbf324bd1392"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
-ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444"
 
 [compat]
 AbstractGPs = "0.4, 0.5"
 BlockArrays = "0.15, 0.16"
 ChainRulesCore = "1"
-FillArrays = "0.7, 0.8, 0.9, 0.10, 0.11, 0.12"
 KernelFunctions = "0.9.6, 0.10"
 MacroTools = "0.4, 0.5"
 Reexport = "0.2, 1"
-Zygote = "0.6"
-ZygoteRules = "0.2"
 julia = "1.3"
diff --git a/docs/Project.toml b/docs/Project.toml
index ae424336..0e35812d 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -6,4 +6,4 @@ Stheno = "8188c328-b5d6-583d-959b-9690869a5511"
 [compat]
 Documenter = "0.27"
 julia = "1.6"
-Stheno = "0.7"
+Stheno = "0.8"
diff --git a/docs/make.jl b/docs/make.jl
index 54606c1d..5b5bb66c 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,3 +1,9 @@
+using Pkg
+Pkg.develop(path=joinpath(@__DIR__, ".."))
+
+using Documenter
+using Stheno
+
 ### Process examples
 
 # Always rerun examples
@@ -35,8 +41,9 @@ else
     mkpath(EXAMPLES_OUT)
 end
 
+dev_command = "Pkg.develop(PackageSpec(; path=relpath(\"$(pkgdir(Stheno))\", pwd())));"
 
-let script = "using Pkg; Pkg.activate(ARGS[1]); Pkg.instantiate()"
+let script = "using Pkg; Pkg.activate(ARGS[1]); $dev_command Pkg.instantiate()"
     for example in example_locations
         if !success(`$(Base.julia_cmd()) -e $script $example`)
             error(
@@ -68,10 +75,6 @@ isempty(processes) || success(processes) || error("some examples were not run su
 
 
 ### Build documentation
-using Pkg
-Pkg.develop(path=joinpath(@__DIR__, ".."))
-using Documenter, Stheno
-
 DocMeta.setdocmeta!(
     Stheno,
     :DocTestSetup,
diff --git a/docs/src/input_types.md b/docs/src/input_types.md
index 6b23c070..499b46b7 100644
--- a/docs/src/input_types.md
+++ b/docs/src/input_types.md
@@ -31,7 +31,7 @@ For example, this means that when handling multi-dimensional inputs stored in a
 When constructing a GP whose domain is the real-line, for example when using a GP to solve some kind of time-series problem, it is sufficient to work with `Vector{<:Real}`s of inputs. As such, the following is completely valid:
 ```julia
 using Stheno: GPC
-f = wrap(GP(SqExponentialKernel()), GPC())
+f = atomic(GP(SqExponentialKernel()), GPC())
 x = randn(10)
 f(x)
 ```
diff --git a/docs/src/internals.md b/docs/src/internals.md
index 3eacbd2b..a175ceb9 100644
--- a/docs/src/internals.md
+++ b/docs/src/internals.md
@@ -1,9 +1,9 @@
 # Interfaces
 
 The primary objects in Stheno are some special subtypes of `AbstractGP`. There are three primary concrete subtypes of `AbstractGP`:
-- `WrappedGP`: an atomic Gaussian process given by wrapping an `AbstractGP`.
-- `CompositeGP`: a Gaussian process composed of other `WrappedGP`s and `CompositeGP`s, whose properties are determined recursively from the GPs of which it is composed.
-- `GaussianProcessProbabilisticProgramme` / `GPPP`: a Gaussian process comprising `WrappedGP`s and `CompositeGP`s. This is the primary piece of functionality that users should interact with.
+- `AtomicGP`: an atomic Gaussian process given by wrapping an `AbstractGP`.
+- `CompositeGP`: a Gaussian process composed of other `AtomicGP`s and `CompositeGP`s, whose properties are determined recursively from the GPs of which it is composed.
+- `GaussianProcessProbabilisticProgramme` / `GPPP`: a Gaussian process comprising `AtomicGP`s and `CompositeGP`s. This is the primary piece of functionality that users should interact with.
 
 Each of these types implements the [Internal AbstractGPs API](https://github.com/JuliaGaussianProcesses/AbstractGPs.jl).
 
@@ -13,7 +13,7 @@ This documentation provides the information necessary to understand the internal
 
 It is crucial for pseudo-point methods, and for the computation of marginal statistics at a reasonable scale, to be able to compute the diagonal of a given covariance matrix in linear time in the size of its inputs.
 This, in turn, necessitates that the diagonal of a given cross-covariance matrix can also be computed efficiently as the evaluation of covariance matrices often rely on the evaluation of cross-covariance matrices.
-As such, we have the following functions in addition to the AbstractGPs API implemented for `WrappedGP`s and `CompositeGP`s:
+As such, we have the following functions in addition to the AbstractGPs API implemented for `AtomicGP`s and `CompositeGP`s:
 
 | Function | Brief description |
 |:--------------------- |:---------------------- |
@@ -24,18 +24,18 @@ As such, we have the following functions in addition to the AbstractGPs API impl
 The second and third rows of the table only make sense when `length(x) == length(x′)`, of course.
 
 
-## WrappedGP
+## AtomicGP
 
-We can construct a `WrappedGP` in the following manner:
+We can construct a `AtomicGP` in the following manner:
 
 ```julia
-f = wrap(GP(m, k), gpc)
+f = atomic(GP(m, k), gpc)
 
 ```
 where `m` is its `MeanFunction`, `k` its `Kernel`. `gpc` is a `GPC` object that handles some book-keeping, and is discussed in more depth below.
 
-The `AbstractGP` interface is implemented for `WrappedGP`s in terms of the `AbstractGP` that they wrap.
-So if you want a particular behaviour, just make sure that the `AbstractGP` that you wrap has the functionality you want.
+The `AbstractGP` interface is implemented for `AtomicGP`s in terms of the `AbstractGP` that they atomic.
+So if you want a particular behaviour, just make sure that the `AbstractGP` that you atomic has the functionality you want.
 
 ### Aside: Example Kernel implementation
 
@@ -55,16 +55,16 @@ You should write
 ```julia
 # THIS IS GOOD. PLEASE DO THIS
 gpc = GPC()
-f = wrap(GP(mf, kf), gpc)
-g = wrap(GP(mg, kg), gpc)
+f = atomic(GP(mf, kf), gpc)
+g = atomic(GP(mg, kg), gpc)
 h = f + g
 # THIS IS GOOD. PLEASE DO THIS
 ```
 The rule is simple: when constructing GPs that you plan to make interact later in your program, construct them using the same `gpc` object. For example, DON'T do the following:
 ```julia
 # THIS IS BAD. PLEASE DON'T DO THIS
-f = wrap(GP(mf, kf), GPC())
-g = wrap(GP(mg, kg), GPC())
+f = atomic(GP(mf, kf), GPC())
+g = atomic(GP(mg, kg), GPC())
 h = f + g
 # THIS IS BAD. PLEASE DON'T DO THIS
 ```
@@ -75,7 +75,7 @@ The mistake here is to construct a separate `GPC` object for each `GP`. Hopefull
 
 ## CompositeGP
 
-`CompositeGP`s are constructed as affine transformations of `CompositeGP`s and `WrappedGP`s.
+`CompositeGP`s are constructed as affine transformations of `CompositeGP`s and `AtomicGP`s.
 We describe the implemented transformations below.
 You can add additional transformations -- see [Custom Affine Transformations](@ref) for an a worked example.
 
@@ -129,7 +129,7 @@ This particular function isn't part of the user-facing API because it isn't gene
 ## GPPP
 
 The `GaussianProcessProbabilisticProgramme` is another `AbstractGP` which enables provides a
-thin layer of convenience functionality on top of `WrappedGP`s and `CompositeGP`s, and is
+thin layer of convenience functionality on top of `AtomicGP`s and `CompositeGP`s, and is
 the primary way in which it is intended that users will interact with this package.
 
 A `GPPP` like this
@@ -140,11 +140,11 @@ f = @gppp let
     f3 = f1 + f2
 end
 ```
-is equivalent to manually constructing a `GPPP` using `WrappedGP`s and `CompositeGP`s:
+is equivalent to manually constructing a `GPPP` using `AtomicGP`s and `CompositeGP`s:
 ```julia
 gpc = GPC()
-f1 = wrap(GP(SEKernel()), gpc)
-f2 = wrap(GP(SEKernel()), gpc)
+f1 = atomic(GP(SEKernel()), gpc)
+f2 = atomic(GP(SEKernel()), gpc)
 f3 = f1 + f2
 f = Stheno.GPPP((f1=f1, f2=f2, f3=f3), gpc)
 ```
diff --git a/examples/custom_affine_transformations/script.jl b/examples/custom_affine_transformations/script.jl
index 4cea393e..6d0acc52 100644
--- a/examples/custom_affine_transformations/script.jl
+++ b/examples/custom_affine_transformations/script.jl
@@ -17,12 +17,12 @@ using Stheno
 # Suppose that, for some reason, you wish to implement the affine transformation of a single
 # process `f` given by `(Af)(x) = f(x) + f(x + 3) - 2`.
 # In order to define this transformation, first create a function which accepts `f` and
-# returns a `CompositeGP`:
-using Stheno: AbstractGP, CompositeGP, SthenoAbstractGP
+# returns a `DerivedGP`:
+using Stheno: AbstractGP, DerivedGP, SthenoAbstractGP
 
-A(f::SthenoAbstractGP) = CompositeGP((A, f), f.gpc)
+A(f::SthenoAbstractGP) = DerivedGP((A, f), f.gpc)
 
-# The first argument to `CompositeGP` contains `A` itself and any data needed to fully
+# The first argument to `DerivedGP` contains `A` itself and any data needed to fully
 # specify the process results from this transformation. In this case the only piece of
 # information required is `f`, but really any data can be put in this argument.
 # For example, if we wished to replace the translation of `-3` by a parameter, we could do
@@ -48,7 +48,7 @@ const A_args = Tuple{typeof(A), SthenoAbstractGP};
 Stheno.mean((A, f)::A_args, x::AbstractVector) = mean(f, x) .+ mean(f, x .+ 3) .- 2
 
 # The first argument here is _always_ going to be precisely the tuple of arguments passed
-# into the `CompositeGP` constructor above.
+# into the `DerivedGP` constructor above.
 # You can assume that you can compute any statistics of `f` that the AbstractGPs API
 # provides.
 
diff --git a/examples/differentiation/script.jl b/examples/differentiation/script.jl
index 5d32bb00..d0eb005d 100644
--- a/examples/differentiation/script.jl
+++ b/examples/differentiation/script.jl
@@ -22,9 +22,9 @@ import AbstractGPs: mean, cov, var
 
 using AbstractGPs: AbstractGP
 using AbstractGPs.TestUtils: test_internal_abstractgps_interface
-using Stheno: CompositeGP
+using Stheno: DerivedGP
 
-derivative(f::AbstractGP) = CompositeGP((derivative, f), f.gpc)
+derivative(f::AbstractGP) = DerivedGP((derivative, f), f.gpc)
 
 
 const deriv_args = Tuple{typeof(derivative), AbstractGP}
diff --git a/examples/extended_mauna_loa/script.jl b/examples/extended_mauna_loa/script.jl
index 7d04c77b..e9ab15d5 100644
--- a/examples/extended_mauna_loa/script.jl
+++ b/examples/extended_mauna_loa/script.jl
@@ -209,6 +209,14 @@ function optimize_loss(loss, θ_init; optimizer=default_optimizer, maxiter=1_000
     return unflatten(result.minimizer), result
 end
 
+function KernelFunctions.kernelmatrix(k::ConstantKernel, x::AbstractVector)
+    return fill(only(k.c), length(x), length(x))
+end
+
+function KernelFunctions.kernelmatrix(k::ConstantKernel, x::AbstractVector, y::AbstractVector)
+    return fill(only(k.c), length(x), length(y))
+end
+
 θ_opt, result = optimize_loss(nlml, init_params)
 
 # ## Plot the resulting model fit.
diff --git a/examples/getting_started/script.jl b/examples/getting_started/script.jl
index 5f83c400..7d3f72a0 100644
--- a/examples/getting_started/script.jl
+++ b/examples/getting_started/script.jl
@@ -129,8 +129,9 @@ using ParameterHandling
     l2 = positive(5.0),
     s2 = positive(1.0),
 
-    ## Observation noise variance -- we'll be learning this as well.
-    s_noise = positive(0.1),
+    ## Observation noise variance -- we'll be learning this as well. Constrained to be
+    ## at least 1e-3.
+    s_noise = positive(0.1, exp, 1e-3),
 )
 
 # We've used `ParameterHandling.jl`s `positive` constraint to ensure that all of the
diff --git a/src/Stheno.jl b/src/Stheno.jl
index 2318cb8b..4ad6c06d 100644
--- a/src/Stheno.jl
+++ b/src/Stheno.jl
@@ -1,68 +1,47 @@
 module Stheno
 
+    # Users generally need access to the functionality from both of these packages.
     using Reexport
+    @reexport using AbstractGPs
+    @reexport using KernelFunctions
 
-    using AbstractGPs
     using BlockArrays
     using ChainRulesCore
-    using FillArrays
-    @reexport using KernelFunctions
     using LinearAlgebra
     using Random
-    using Zygote
-    using ZygoteRules
 
     import Base.Broadcast: broadcasted
 
-    using AbstractGPs: AbstractGP, FiniteGP, GP
-    import AbstractGPs:
-        mean,
-        cov,
-        var,
-        mean_and_cov,
-        mean_and_var,
-        rand,
-        logpdf,
-        elbo,
-        dtc,
-        posterior,
-        marginals
+    using AbstractGPs: AbstractGP, FiniteGP
+    import AbstractGPs: mean, cov, var
 
     using MacroTools: @capture, combinedef, postwalk, splitdef
 
     const AV{T} = AbstractVector{T}
 
-    # Various bits of utility that aren't inherently GP-related. Often very type-piratic.
-    include(joinpath("util", "zygote_rules.jl"))
-    include(joinpath("util", "covariance_matrices.jl"))
-    include(joinpath("util", "block_arrays.jl"))
-    include(joinpath("util", "abstract_data_set.jl"))
-    include(joinpath("util", "proper_type_piracy.jl"))
-
-    # Supertype for GPs.
-    include("abstract_gp.jl")
-
-    # Atomic GP objects.
-    include(joinpath("gp", "gp.jl"))
-
-    # Composite GPs, constructed via affine transformation of CompositeGPs and GPs.
-    include(joinpath("composite", "composite_gp.jl"))
-    include(joinpath("composite", "cross.jl"))
-    include(joinpath("composite", "product.jl"))
-    include(joinpath("composite", "addition.jl"))
-    include(joinpath("composite", "compose.jl"))
-    # include(joinpath("composite", "gradient.jl"))
-    # include(joinpath("composite", "integrate.jl"))
-
-    # Gaussian Process Probabilistic Programme object which implements the AbstractGPs API.
+    # A couple of AbstractVector subtypes useful for expressing structure in inputs
+    # regularly found in GPPPs.
+    include("input_collection_types.jl")
+
+    # AbstractGP subtypes and associated utility.
+    include(joinpath("gp", "util.jl"))
+    include(joinpath("gp", "atomic_gp.jl"))
+    include(joinpath("gp", "derived_gp.jl"))
+    include(joinpath("gp", "sparse_finite_gp.jl"))
+
+    # Affine transformation library. Each file contains one / a couple of closely-related
+    # affine transformations. Consequently, the code in each file can be understood
+    # independently of the code in each other file.
+    include(joinpath("affine_transformations", "cross.jl"))
+    include(joinpath("affine_transformations", "addition.jl"))
+    include(joinpath("affine_transformations", "compose.jl"))
+    include(joinpath("affine_transformations", "product.jl"))
+
+    # AbstractGP subtype which groups together other AbstractGP subtypes.
     include("gaussian_process_probabilistic_programme.jl")
 
-    # Sparse GP hack to make pseudo-point approximations play nicely with Turing.jl.
-    include("sparse_finite_gp.jl")
-
-    include("deprecate.jl")
-
-    export wrap, BlockData, GPC, GPPPInput, @gppp
+    export atomic, BlockData, GPC, GPPPInput, @gppp
     export ∘, select, stretch, periodic, shift
-    export cov_diag, mean_and_cov_diag
+    export SparseFiniteGP
+
 end # module
diff --git a/src/composite/addition.jl b/src/affine_transformations/addition.jl
similarity index 93%
rename from src/composite/addition.jl
rename to src/affine_transformations/addition.jl
index bb06b844..dab65e55 100644
--- a/src/composite/addition.jl
+++ b/src/affine_transformations/addition.jl
@@ -7,7 +7,7 @@ Produces an AbstractGP `f` satisfying `f(x) = fa(x) + fb(x)`.
 """
 function +(fa::AbstractGP, fb::AbstractGP)
     @assert fa.gpc === fb.gpc
-    return CompositeGP((+, fa, fb), fa.gpc)
+    return DerivedGP((+, fa, fb), fa.gpc)
 end
 -(fa::AbstractGP, fb::AbstractGP) = fa + (-fb)
 
@@ -52,10 +52,10 @@ end
 
 
 #
-# Add a constant or known function to a GP -- just shifts the mean
+# Add a constant or known function to an AbstractGP -- just shifts the mean
 #
 
-+(b, f::AbstractGP) = CompositeGP((+, b, f), f.gpc)
++(b, f::AbstractGP) = DerivedGP((+, b, f), f.gpc)
 +(f::AbstractGP, b) = b + f
 -(b::Real, f::AbstractGP) = b + (-f)
 -(f::AbstractGP, b::Real) = f + (-b)
diff --git a/src/composite/compose.jl b/src/affine_transformations/compose.jl
similarity index 78%
rename from src/composite/compose.jl
rename to src/affine_transformations/compose.jl
index 527c5f33..229c468c 100644
--- a/src/composite/compose.jl
+++ b/src/affine_transformations/compose.jl
@@ -1,11 +1,11 @@
 import Base: ∘
 
 """
-    ∘(f::GP, g)
+    ∘(f::AbstractGP, g)
 
-Constructs the GP f′ given by f′(x) := f(g(x))
+Constructs the DerivedGP f′ given by f′(x) := f(g(x))
 """
-∘(f::AbstractGP, g) = CompositeGP((∘, f, g), f.gpc)
+∘(f::AbstractGP, g) = DerivedGP((∘, f, g), f.gpc)
 
 
 const comp_args = Tuple{typeof(∘), AbstractGP, Any}
@@ -60,7 +60,11 @@ stretch(f::AbstractGP, A::AbstractMatrix{<:Real}) = f ∘ Stretch(A)
 """
     Select{Tidx}
 
-Use inside an input transformation meanfunction / crosskernel to improve peformance.
+Construct the process given by
+```julia
+f_new(x) = f(x[idx])
+```
+In effect projects `f` into a higher-dimensional space.
 """
 struct Select{Tidx}
     idx::Tidx
@@ -69,26 +73,6 @@ end
 broadcasted(f::Select, x::ColVecs) = ColVecs(x.X[f.idx, :])
 broadcasted(f::Select{<:Integer}, x::ColVecs) = x.X[f.idx, :]
 
-function broadcasted(f::Select, x::AbstractVector{<:CartesianIndex})
-    out = Matrix{Int}(undef, length(f.idx), length(x))
-    for i in f.idx, n in eachindex(x)
-        out[i, n] = x[n][i]
-    end
-    return ColVecs(out)
-end
-
-function ChainRulesCore.rrule(::typeof(broadcasted), f::Select, x::AV{<:CartesianIndex})
-    return broadcasted(f, x), Δ->(NoTangent(), NoTangent(), ZeroTangent())
-end
-
-function broadcasted(f::Select{<:Integer}, x::AV{<:CartesianIndex})
-    out = Matrix{Int}(undef, length(x))
-    for n in eachindex(x)
-        out[n] = x[n][f.idx]
-    end
-    return out
-end
-
 """
     select(f::AbstractGP, idx)
 
@@ -135,6 +119,6 @@ broadcasted(f::Shift, x::ColVecs) = ColVecs(x.X .- f.a)
     shift(f::AbstractGP, a::Real)
     shift(f::AbstractGP, a::AbstractVector{<:Real})
 
-Returns the GP `g` given by `g(x) = f(x - a)`
+Returns the DerivedGP `g` given by `g(x) = f(x - a)`
 """
 shift(f::AbstractGP, a) = f ∘ Shift(a)
diff --git a/src/composite/cross.jl b/src/affine_transformations/cross.jl
similarity index 53%
rename from src/composite/cross.jl
rename to src/affine_transformations/cross.jl
index 3a121c57..a3a0a875 100644
--- a/src/composite/cross.jl
+++ b/src/affine_transformations/cross.jl
@@ -1,11 +1,42 @@
+#
+# Functionality for constructing block matrices.
+#
+
+# Mangle name to avoid type piracy.
+_mortar(blocks::AbstractArray) = BlockArrays.mortar(blocks)
+
+function ChainRulesCore.rrule(::typeof(_mortar), _blocks::AbstractArray)
+    mortar_pullback(Δ::Tangent) = (NoTangent(), Δ.blocks)
+    return _mortar(_blocks), mortar_pullback
+end
+
+# A hook to which I can attach an rrule without commiting type-piracy against BlockArrays.
+_collect(X::BlockArray) = Array(X)
+
+function ChainRulesCore.rrule(::typeof(_collect), X::BlockArray)
+    function Array_pullback(Δ::Array)
+        ΔX = Tangent{Any}(blocks=BlockArray(Δ, axes(X)).blocks, axes=NoTangent())
+        return (NoTangent(), ΔX)
+    end
+    return Array(X), Array_pullback
+end
+
+
+
+#
+# Functionality for grouping together collections of GPs into a single GP.
+#
+
 """
-    cross(fs::AbstractVector{<:GP})
+    cross(fs::AbstractVector{<:AbstractGP})
 
-Creates a multi-output GP from an `AbstractVector` of `GP`s.
+Creates a GPPP-like object from a collection of GPs.
+This is largely an implementation detail that is useful for GPPPs.
+Not included in the user-facing API.
 """
 function LinearAlgebra.cross(fs::AbstractVector{<:AbstractGP})
     consistency_checks(fs)
-    return CompositeGP((cross, fs), first(fs).gpc)
+    return DerivedGP((cross, fs), first(fs).gpc)
 end
 
 function consistency_checks(fs)
@@ -19,36 +50,36 @@ const cross_args{T<:AbstractVector{<:AbstractGP}} = Tuple{typeof(cross), T}
 
 function mean((_, fs)::cross_args, x::BlockData)
     blks = map((f, blk)->mean(f, blk), fs, blocks(x))
-    return _collect(mortar(blks))
+    return _collect(_mortar(blks))
 end
 
 function cov((_, fs)::cross_args, x::BlockData)
     Cs = reshape(map((f, blk)->cov(f, (cross, fs), blk, x), fs, blocks(x)), :, 1)
-    return _collect(mortar(reshape(Cs, :, 1)))
+    return _collect(_mortar(reshape(Cs, :, 1)))
 end
 
 function var((_, fs)::cross_args, x::BlockData)
     cs = map(var, fs, blocks(x))
-    return _collect(mortar(cs))
+    return _collect(_mortar(cs))
 end
 
 function cov((_, fs)::cross_args, x::BlockData, x′::BlockData)
     Cs = reshape(map((f, blk)->cov(f, (cross, fs), blk, x′), fs, blocks(x)), :, 1)
-    return _collect(mortar(reshape(Cs, :, 1)))
+    return _collect(_mortar(reshape(Cs, :, 1)))
 end
 
 function var((_, fs)::cross_args, x::BlockData, x′::BlockData)
     cs = map(var, fs, blocks(x), blocks(x′))
-    return _collect(mortar(cs))
+    return _collect(_mortar(cs))
 end
 
 function cov((_, fs)::cross_args, f′::AbstractGP, x::BlockData, x′::AV)
     Cs = reshape(map((f, x)->cov(f, f′, x, x′), fs, blocks(x)), :, 1)
-    return _collect(mortar(Cs))
+    return _collect(_mortar(Cs))
 end
 function cov(f::AbstractGP, (_, fs)::cross_args, x::AV, x′::BlockData)
     Cs = reshape(map((f′, x′)->cov(f, f′, x, x′), fs, blocks(x′)), 1, :)
-    return _collect(mortar(Cs))
+    return _collect(_mortar(Cs))
 end
 
 function var(args::cross_args, f′::AbstractGP, x::BlockData, x′::AV)
@@ -57,20 +88,3 @@ end
 function var(f::AbstractGP, args::cross_args, x::AV, x′::BlockData)
     return diag(cov(f, args, x, x′))
 end
-
-
-
-#
-# Build a single FiniteGP from a collection of FiniteGPs.
-#
-
-function finites_to_block(fs::AV{<:FiniteGP})
-    return FiniteGP(
-        cross(map(f->f.f, fs)),
-        BlockData(map(f->f.x, fs)),
-        make_block_noise(map(f->f.Σy, fs)),
-    )
-end
-
-make_block_noise(Σys::Vector{<:Diagonal}) = Diagonal(Vector(mortar(diag.(Σys))))
-make_block_noise(Σys::Vector) = block_diagonal(Σys)
diff --git a/src/composite/product.jl b/src/affine_transformations/product.jl
similarity index 95%
rename from src/composite/product.jl
rename to src/affine_transformations/product.jl
index 2121c58e..6356b2a7 100644
--- a/src/composite/product.jl
+++ b/src/affine_transformations/product.jl
@@ -8,8 +8,8 @@ function `f`.
 
 If `f isa Real`, then `h(x) = f * g(x)`.
 """
-*(f, g::AbstractGP) = CompositeGP((*, f, g), g.gpc)
-*(f::AbstractGP, g) = CompositeGP((*, g, f), f.gpc)
+*(f, g::AbstractGP) = DerivedGP((*, f, g), g.gpc)
+*(f::AbstractGP, g) = DerivedGP((*, g, f), f.gpc)
 *(f::AbstractGP, g::AbstractGP) = throw(ArgumentError("Cannot multiply two GPs together."))
 
 const prod_args{Tf} = Tuple{typeof(*), Tf, <:AbstractGP}
diff --git a/src/composite/gradient.jl b/src/composite/gradient.jl
deleted file mode 100644
index d5d241f3..00000000
--- a/src/composite/gradient.jl
+++ /dev/null
@@ -1,9 +0,0 @@
-"""
-
-"""
-∇(f::GP) = GP(∇, f)
-
-μ_p′(::typeof(∇), f) = DerivativeMean(mean(f))
-k_p′(::typeof(∇), f) = DerivativeKernel(kernel(f))
-k_p′p(::typeof(∇), f, fp::GP) = DerivativeLhsCross(kernel(f, fp))
-k_pp′(fp::GP, ::typeof(∇), f) = DerivativeRhsCross(kernel(fp, f))
diff --git a/src/composite/integrate.jl b/src/composite/integrate.jl
deleted file mode 100644
index cc4c36ab..00000000
--- a/src/composite/integrate.jl
+++ /dev/null
@@ -1,21 +0,0 @@
-export ∫, StandardNormal
-
-struct StandardNormal end
-
-"""
-    ∫(::StandardNormal, f::GP{<:SEKernel})
-
-Return the scalar-valued process which results from computing the expectation of `f`, whose
-mean and kernel are the zero-function and `SEKernel` resp., under a standard normal distribution.
-"""
-∫(::StandardNormal, f::GP{<:μFun, <:SEKernel}) = GP(∫, StandardNormal(), f)
-μ_p′(::typeof(∫), ::StandardNormal, f::GP{<:μFun, <:SEKernel}) = ZeroMean()
-k_p′(::typeof(∫), ::StandardNormal, f::GP{<:μFun, <:SEKernel}) = Finite(Constant(1 / sqrt(3)), [1])
-k_pp′(f_p::GP, ::typeof(∫), ::StandardNormal, f::GP) =
-    f_p === f ?
-        RhsFinite((x, ::Any)->exp(-0.25 * x^2) / sqrt(2), [1]) :
-        throw(error("Don't know how to compute xcovs for general graphs for integration."))
-k_p′p(f_p::GP, ::typeof(∫), ::StandardNormal, f::GP) =
-    f_p === f ?
-        LhsFinite((::Any, x′)->exp(-0.25 * x′^2) / sqrt(2), [1]) :
-        throw(error("Don't know how to compute xcovs for general graphs for integration."))
diff --git a/src/deprecate.jl b/src/deprecate.jl
deleted file mode 100644
index a17dee00..00000000
--- a/src/deprecate.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-@deprecate GP(m, k, gpc) Stheno.wrap(GP(m, k), gpc)
-@deprecate GP(k, gpc) Stheno.wrap(GP(k), gpc)
-
-using KernelFunctions: NeuralKernelNetwork
-export NeuralKernelNetwork
-
-@deprecate approx_posterior posterior
diff --git a/src/gaussian_process_probabilistic_programme.jl b/src/gaussian_process_probabilistic_programme.jl
index 585d17f3..a64a24fd 100644
--- a/src/gaussian_process_probabilistic_programme.jl
+++ b/src/gaussian_process_probabilistic_programme.jl
@@ -1,3 +1,8 @@
+#
+# GPPP implementation.
+#
+
+
 """
     GaussianProcessProbabilisticProgramme(fs, gpc)
 
@@ -12,40 +17,6 @@ end
 
 const GPPP = GaussianProcessProbabilisticProgramme
 
-"""
-    GPPPInput(p, x::AbstractVector)
-
-An collection of inputs for a GPPP.
-`p` indicates which process the vector `x` should be extracted from.
-The required type of `p` is determined by the type of the keys in the `GPPP` indexed.
-
-```jldoctest
-julia> x = [1.0, 1.5, 0.3];
-
-julia> v = GPPPInput(:a, x)
-3-element GPPPInput{Symbol, Float64, Vector{Float64}}:
- (:a, 1.0)
- (:a, 1.5)
- (:a, 0.3)
-
-julia> v isa AbstractVector{Tuple{Symbol, Float64}}
-true
-
-julia> v == map(x_ -> (:a, x_), x)
-true
-```
-"""
-struct GPPPInput{Tp, T, Tx<:AbstractVector{T}} <: AbstractVector{Tuple{Tp, T}}
-    p::Tp
-    x::Tx
-end
-
-Base.size(x::GPPPInput) = (length(x.x), )
-
-Base.getindex(x::GPPPInput, idx) = map(x_ -> (x.p, x_), x.x[idx])
-
-
-
 #
 # Implementation of the AbstractGPs API for the inputs types which are valid for a GPPP.
 # See `AbstractGPs.jl` for details.
@@ -71,39 +42,39 @@ function extract_components(f::GPPP, x::AbstractVector{<:Tuple{T, V}} where {T,
     return extract_components(f, BlockData(blocks))
 end
 
-function mean(f::GPPP, x::AbstractVector)
+function AbstractGPs.mean(f::GPPP, x::AbstractVector)
     fs, vs = extract_components(f, x)
     return mean(fs, vs)
 end
 
-function cov(f::GPPP, x::AbstractVector)
+function AbstractGPs.cov(f::GPPP, x::AbstractVector)
     fs, vs = extract_components(f, x)
     return cov(fs, vs)
 end
 
-function var(f::GPPP, x::AbstractVector)
+function AbstractGPs.var(f::GPPP, x::AbstractVector)
     fs, vs = extract_components(f, x)
     return var(fs, vs)
 end
 
-function cov(f::GPPP, x::AbstractVector, x′::AbstractVector)
+function AbstractGPs.cov(f::GPPP, x::AbstractVector, x′::AbstractVector)
     fs_x, vs_x = extract_components(f, x)
     fs_x′, vs_x′ = extract_components(f, x′)
     return cov(fs_x, fs_x′, vs_x, vs_x′)
 end
 
-function var(f::GPPP, x::AbstractVector, x′::AbstractVector)
+function AbstractGPs.var(f::GPPP, x::AbstractVector, x′::AbstractVector)
     fs_x, vs_x = extract_components(f, x)
     fs_x′, vs_x′ = extract_components(f, x′)
     return var(fs_x, fs_x′, vs_x, vs_x′)
 end
 
-function mean_and_cov(f::GPPP, x::AbstractVector)
+function AbstractGPs.mean_and_cov(f::GPPP, x::AbstractVector)
     fs, vs = extract_components(f, x)
     return mean_and_cov(fs, vs)
 end
 
-function mean_and_var(f::GPPP, x::AbstractVector)
+function AbstractGPs.mean_and_var(f::GPPP, x::AbstractVector)
     fs, vs = extract_components(f, x)
     return mean_and_var(fs, vs)
 end
@@ -221,7 +192,7 @@ macro gppp(let_block::Expr)
     wrapped_model = Expr(:block,
         :($gpc_sym = Stheno.GPC()),
         postwalk(
-            x->@capture(x, GP(xs__)) ? :(Stheno.wrap(GP($(xs...)), $gpc_sym)) : x,
+            x->@capture(x, GP(xs__)) ? :(Stheno.atomic(GP($(xs...)), $gpc_sym)) : x,
             model_expr,
         ).args...,
         :(Stheno.GPPP($var_mapping, $gpc_sym)),
diff --git a/src/gp/atomic_gp.jl b/src/gp/atomic_gp.jl
new file mode 100644
index 00000000..a2901ae1
--- /dev/null
+++ b/src/gp/atomic_gp.jl
@@ -0,0 +1,37 @@
+"""
+    AtomicGP{Tgp<:AbstractGP} <: SthenoAbstractGP
+
+A thin wrapper around an AbstractGP that does some book-keeping.
+
+```julia
+f = atomic(GP(SEKernel()), GPC())
+```
+builds a `AtomicGP` that `Stheno` knows how to work with.
+"""
+struct AtomicGP{Tgp<:AbstractGP} <: SthenoAbstractGP
+    gp::Tgp
+    n::Int
+    gpc::GPC
+    function AtomicGP{Tgp}(gp::Tgp, gpc::GPC) where {Tgp<:AbstractGP}
+        wgp = new{Tgp}(gp, next_index(gpc), gpc)
+        gpc.n += 1
+        return wgp
+    end
+end
+
+atomic(gp::Tgp, gpc::GPC) where {Tgp<:AbstractGP} = AtomicGP{Tgp}(gp, gpc)
+
+AbstractGPs.mean(f::AtomicGP, x::AbstractVector) = mean(f.gp, x)
+
+AbstractGPs.cov(f::AtomicGP, x::AbstractVector) = cov(f.gp, x)
+AbstractGPs.var(f::AtomicGP, x::AbstractVector) = var(f.gp, x)
+
+AbstractGPs.cov(f::AtomicGP, x::AbstractVector, x′::AbstractVector) = cov(f.gp, x, x′)
+AbstractGPs.var(f::AtomicGP, x::AbstractVector, x′::AbstractVector) = var(f.gp, x, x′)
+
+function AbstractGPs.cov(f::AtomicGP, f′::AtomicGP, x::AbstractVector, x′::AbstractVector)
+    return f === f′ ? cov(f, x, x′) : zeros(length(x), length(x′))
+end
+function AbstractGPs.var(f::AtomicGP, f′::AtomicGP, x::AbstractVector, x′::AbstractVector)
+    return f === f′ ? var(f, x, x′) : zeros(length(x))
+end
diff --git a/src/composite/composite_gp.jl b/src/gp/derived_gp.jl
similarity index 54%
rename from src/composite/composite_gp.jl
rename to src/gp/derived_gp.jl
index 80ab88bd..c44786f3 100644
--- a/src/composite/composite_gp.jl
+++ b/src/gp/derived_gp.jl
@@ -1,36 +1,36 @@
 """
-    CompositeGP{Targs} <: AbstractGP
+    DerivedGP{Targs} <: AbstractGP
 
 A GP derived from other GPs via an affine transformation. Specification given by `args`.
 You should generally _not_ construct this object manually.
 """
-struct CompositeGP{Targs} <: SthenoAbstractGP
+struct DerivedGP{Targs} <: SthenoAbstractGP
     args::Targs
     n::Int
     gpc::GPC
-    function CompositeGP{Targs}(args::Targs, gpc::GPC) where {Targs}
+    function DerivedGP{Targs}(args::Targs, gpc::GPC) where {Targs}
         gp = new{Targs}(args, next_index(gpc), gpc)
         gpc.n += 1
         return gp
     end
 end
-CompositeGP(args::Targs, gpc::GPC) where {Targs} = CompositeGP{Targs}(args, gpc)
+DerivedGP(args::Targs, gpc::GPC) where {Targs} = DerivedGP{Targs}(args, gpc)
 
-mean(f::CompositeGP, x::AbstractVector) = mean(f.args, x)
+AbstractGPs.mean(f::DerivedGP, x::AbstractVector) = mean(f.args, x)
 
-cov(f::CompositeGP, x::AbstractVector) = cov(f.args, x)
-var(f::CompositeGP, x::AbstractVector) = var(f.args, x)
+AbstractGPs.cov(f::DerivedGP, x::AbstractVector) = cov(f.args, x)
+AbstractGPs.var(f::DerivedGP, x::AbstractVector) = var(f.args, x)
 
-cov(f::CompositeGP, x::AbstractVector, x′::AbstractVector) = cov(f.args, x, x′)
-var(f::CompositeGP, x::AbstractVector, x′::AbstractVector) = var(f.args, x, x′)
+AbstractGPs.cov(f::DerivedGP, x::AbstractVector, x′::AbstractVector) = cov(f.args, x, x′)
+AbstractGPs.var(f::DerivedGP, x::AbstractVector, x′::AbstractVector) = var(f.args, x, x′)
 
-function cov(
+function AbstractGPs.cov(
     f::SthenoAbstractGP, f′::SthenoAbstractGP, x::AbstractVector, x′::AbstractVector,
 )
     @assert f.gpc === f′.gpc
     if f.n === f′.n
         return cov(f.args, x, x′)
-    elseif f isa WrappedGP && f.n > f′.n || f′ isa WrappedGP && f′.n > f.n
+    elseif f isa AtomicGP && f.n > f′.n || f′ isa AtomicGP && f′.n > f.n
         return zeros(length(x), length(x′))
     elseif f.n >= f′.n
         return cov(f.args, f′, x, x′)
@@ -39,13 +39,13 @@ function cov(
     end
 end
 
-function var(
+function AbstractGPs.var(
     f::SthenoAbstractGP, f′::SthenoAbstractGP, x::AbstractVector, x′::AbstractVector,
 )
     @assert f.gpc === f′.gpc
     if f.n === f′.n
         return var(f.args, x, x′)
-    elseif f isa WrappedGP && f.n > f′.n || f′ isa WrappedGP && f′.n > f.n
+    elseif f isa AtomicGP && f.n > f′.n || f′ isa AtomicGP && f′.n > f.n
         return zeros(length(x))
     elseif f.n >= f′.n
         return var(f.args, f′, x, x′)
diff --git a/src/gp/gp.jl b/src/gp/gp.jl
deleted file mode 100644
index 847221da..00000000
--- a/src/gp/gp.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-    WrappedGP{Tgp<:AbstractGP} <: SthenoAbstractGP
-
-A thin wrapper around an AbstractGP that does some book-keeping.
-
-```julia
-f = wrap(GP(SEKernel()), GPC())
-```
-builds a `WrappedGP` that `Stheno` knows how to work with.
-"""
-struct WrappedGP{Tgp<:AbstractGP} <: SthenoAbstractGP
-    gp::Tgp
-    n::Int
-    gpc::GPC
-    function WrappedGP{Tgp}(gp::Tgp, gpc::GPC) where {Tgp<:AbstractGP}
-        wgp = new{Tgp}(gp, next_index(gpc), gpc)
-        gpc.n += 1
-        return wgp
-    end
-end
-
-wrap(gp::Tgp, gpc::GPC) where {Tgp<:AbstractGP} = WrappedGP{Tgp}(gp, gpc)
-
-mean(f::WrappedGP, x::AbstractVector) = mean(f.gp, x)
-
-cov(f::WrappedGP, x::AbstractVector) = cov(f.gp, x)
-var(f::WrappedGP, x::AbstractVector) = var(f.gp, x)
-
-cov(f::WrappedGP, x::AbstractVector, x′::AbstractVector) = cov(f.gp, x, x′)
-var(f::WrappedGP, x::AbstractVector, x′::AbstractVector) = var(f.gp, x, x′)
-
-function cov(f::WrappedGP, f′::WrappedGP, x::AbstractVector, x′::AbstractVector)
-    return f === f′ ? cov(f, x, x′) : zeros(length(x), length(x′))
-end
-function var(f::WrappedGP, f′::WrappedGP, x::AbstractVector, x′::AbstractVector)
-    return f === f′ ? var(f, x, x′) : zeros(length(x))
-end
diff --git a/src/sparse_finite_gp.jl b/src/gp/sparse_finite_gp.jl
similarity index 57%
rename from src/sparse_finite_gp.jl
rename to src/gp/sparse_finite_gp.jl
index d72cbdfb..9c416df4 100644
--- a/src/sparse_finite_gp.jl
+++ b/src/gp/sparse_finite_gp.jl
@@ -1,9 +1,3 @@
-import Base: rand, length
-import AbstractGPs: logpdf, AbstractMvNormal
-
-export elbo, dtc
-export SparseFiniteGP
-
 """
     SparseFiniteGP{T1<:FiniteGP, T2<:FiniteGP}
 
@@ -19,7 +13,7 @@ processes". In: Proceedings of the Twelfth International Conference on Artificia
 Intelligence and Statistics. 2009.
 
 ```jldoctest
-julia> f = wrap(GP(Matern32Kernel()), GPC());
+julia> f = atomic(GP(Matern32Kernel()), GPC());
 
 julia> fobs = f(rand(100));
 
@@ -33,36 +27,36 @@ julia> logpdf(fxu, y) < logpdf(fobs, y)
 true
 ```
 """
-struct SparseFiniteGP{T1<:FiniteGP, T2<:FiniteGP} <: AbstractMvNormal
+struct SparseFiniteGP{T1<:FiniteGP, T2<:FiniteGP} <: AbstractGPs.AbstractMvNormal
     fobs::T1
     finducing::T2
 end
 
 Base.length(f::SparseFiniteGP) = length(f.fobs)
 
-mean(f::SparseFiniteGP) = mean(f.fobs)
+AbstractGPs.mean(f::SparseFiniteGP) = mean(f.fobs)
 
 const __covariance_error = "The covariance matrix of a sparse GP can often be dense and " *
     "can cause the computer to run out of memory. If you are sure you have enough " *
     "memory, you can use `cov(f.fobs)`."
 
-cov(f::SparseFiniteGP) = error(__covariance_error)
+AbstractGPs.cov(f::SparseFiniteGP) = error(__covariance_error)
 
-marginals(f::SparseFiniteGP) = marginals(f.fobs)
+AbstractGPs.marginals(f::SparseFiniteGP) = marginals(f.fobs)
 
-rand(rng::AbstractRNG, f::SparseFiniteGP, N::Int) = rand(rng, f.fobs, N)
-rand(f::SparseFiniteGP, N::Int) = rand(Random.GLOBAL_RNG, f, N)
-rand(rng::AbstractRNG, f::SparseFiniteGP) = vec(rand(rng, f, 1))
-rand(f::SparseFiniteGP) = vec(rand(f, 1))
+AbstractGPs.rand(rng::AbstractRNG, f::SparseFiniteGP, N::Int) = rand(rng, f.fobs, N)
+AbstractGPs.rand(f::SparseFiniteGP, N::Int) = rand(Random.GLOBAL_RNG, f, N)
+AbstractGPs.rand(rng::AbstractRNG, f::SparseFiniteGP) = vec(rand(rng, f, 1))
+AbstractGPs.rand(f::SparseFiniteGP) = vec(rand(f, 1))
 
-elbo(f::SparseFiniteGP, y::AV{<:Real}) = elbo(VFE(f.finducing), f.fobs, y)
+AbstractGPs.elbo(f::SparseFiniteGP, y::AV{<:Real}) = elbo(VFE(f.finducing), f.fobs, y)
 
-logpdf(f::SparseFiniteGP, y::AV{<:Real}) = elbo(VFE(f.finducing), f.fobs, y)
+AbstractGPs.logpdf(f::SparseFiniteGP, y::AV{<:Real}) = elbo(VFE(f.finducing), f.fobs, y)
 
-function logpdf(f::SparseFiniteGP, Y::AbstractMatrix{<:Real})
+function AbstractGPs.logpdf(f::SparseFiniteGP, Y::AbstractMatrix{<:Real})
     return map(y -> logpdf(f, y), eachcol(Y))
 end
 
-function posterior(f::SparseFiniteGP, y::AbstractVector{<:Real})
+function AbstractGPs.posterior(f::SparseFiniteGP, y::AbstractVector{<:Real})
     return posterior(AbstractGPs.VFE(f.finducing), f.fobs, y)
 end
diff --git a/src/abstract_gp.jl b/src/gp/util.jl
similarity index 89%
rename from src/abstract_gp.jl
rename to src/gp/util.jl
index 4c3109b3..c8df0a62 100644
--- a/src/abstract_gp.jl
+++ b/src/gp/util.jl
@@ -23,6 +23,3 @@ end
 ChainRulesCore.@non_differentiable GPC()
 
 next_index(gpc::GPC) = gpc.n + 1
-
-# TYPE PIRACY!
-LinearAlgebra.cholesky(D::Diagonal{<:Real, <:Fill}) = AbstractGPs._cholesky(D)
diff --git a/src/input_collection_types.jl b/src/input_collection_types.jl
new file mode 100644
index 00000000..bb7442ff
--- /dev/null
+++ b/src/input_collection_types.jl
@@ -0,0 +1,95 @@
+"""
+    GPPPInput(p, x::AbstractVector)
+
+An collection of inputs for a GPPP.
+`p` indicates which process the vector `x` should be extracted from.
+The required type of `p` is determined by the type of the keys in the `GPPP` indexed.
+
+```jldoctest
+julia> x = [1.0, 1.5, 0.3];
+
+julia> v = GPPPInput(:a, x)
+3-element GPPPInput{Symbol, Float64, Vector{Float64}}:
+ (:a, 1.0)
+ (:a, 1.5)
+ (:a, 0.3)
+
+julia> v isa AbstractVector{Tuple{Symbol, Float64}}
+true
+
+julia> v == map(x_ -> (:a, x_), x)
+true
+```
+"""
+struct GPPPInput{Tp, T, Tx<:AbstractVector{T}} <: AbstractVector{Tuple{Tp, T}}
+    p::Tp
+    x::Tx
+end
+
+Base.size(x::GPPPInput) = (length(x.x), )
+
+Base.getindex(x::GPPPInput, idx::Integer) = only(x[idx:idx])
+
+Base.getindex(x::GPPPInput, idx) = map(x_ -> (x.p, x_), x.x[idx])
+
+
+
+"""
+    BlockData{T, TV<:AbstractVector{T}, TX<:AbstractVector{TV}} <: AbstractVector{T}
+
+A strictly ordered collection of `AbstractVector`s, representing a ragged array of data.
+
+Very useful when working with `GPPP`s. For example
+```julia
+f = @gppp let
+    f1 = GP(SEKernel())
+    f2 = GP(Matern52Kernel())
+    f3 = f1 + f2
+end
+
+# Specify a `BlockData` set that can be used to index into
+# the `f2` and `f3` processes in `f`.
+x = BlockData(
+    GPPPInput(:f2, randn(4)),
+    GPPPInput(:f3, randn(3)),
+)
+
+# Index into `f` at the input.
+f(x)
+```
+"""
+struct BlockData{T, V<:AbstractVector{<:T}} <: AbstractVector{T}
+    X::Vector{V}
+end
+
+BlockData(X::Vector{AbstractVector}) = BlockData{Any, AbstractVector}(X)
+
+BlockData(xs::AbstractVector...) = BlockData([xs...])
+
+Base.size(D::BlockData) = (sum(length, D.X),)
+
+function Base.getindex(D::BlockData, n::Int)
+    b = 1
+    while n > length(D.X[b])
+        n -= length(D.X[b])
+        b += 1
+    end
+    return D.X[b][n]
+end
+
+Base.:(==)(D1::BlockData, D2::BlockData) = D1.X == D2.X
+
+blocks(X::BlockData) = X.X
+
+Base.view(D::BlockData, b::Int, n) = view(D.X[b], n)
+
+Base.eltype(D::BlockData{T}) where {T} = T
+
+function Base.eachindex(D::BlockData)
+    lengths = map(length, blocks(D))
+    return BlockArray(1:sum(lengths), lengths)
+end
+
+Base.vcat(x::GPPPInput...) = BlockData(AbstractVector[x...])
+
+Base.vcat(x::GPPPInput{Symbol, T}...) where {T} = BlockData([x...])
diff --git a/src/util/abstract_data_set.jl b/src/util/abstract_data_set.jl
deleted file mode 100644
index 94b21109..00000000
--- a/src/util/abstract_data_set.jl
+++ /dev/null
@@ -1,57 +0,0 @@
-import Base: size, eachindex, getindex, view, ==, eltype
-
-"""
-    BlockData{T, TV<:AbstractVector{T}, TX<:AbstractVector{TV}} <: AbstractVector{T}
-
-A strictly ordered collection of `AbstractVector`s, representing a ragged array of data.
-
-Very useful when working with `GPPP`s. For example
-```julia
-f = @gppp let
-    f1 = GP(SEKernel())
-    f2 = GP(Matern52Kernel())
-    f3 = f1 + f2
-end
-
-# Specify a `BlockData` set that can be used to index into
-# the `f2` and `f3` processes in `f`.
-x = BlockData(
-    GPPPInput(:f2, randn(4)),
-    GPPPINput(:f3, randn(3)),
-)
-
-# Index into `f` at the input.
-f(x)
-```
-"""
-struct BlockData{T, V<:AbstractVector{<:T}} <: AbstractVector{T}
-    X::Vector{V}
-end
-
-BlockData(X::Vector{AbstractVector}) = BlockData{Any, AbstractVector}(X)
-
-BlockData(xs::AbstractVector...) = BlockData([xs...])
-
-==(D1::BlockData, D2::BlockData) = D1.X == D2.X
-
-size(D::BlockData) = (sum(length, D.X),)
-
-blocks(X::BlockData) = X.X
-
-function getindex(D::BlockData, n::Int)
-    b = 1
-    while n > length(D.X[b])
-        n -= length(D.X[b])
-        b += 1
-    end
-    return D.X[b][n]
-end
-
-view(D::BlockData, b::Int, n) = view(D.X[b], n)
-
-eltype(D::BlockData{T}) where {T} = T
-
-function eachindex(D::BlockData)
-    lengths = map(length, blocks(D))
-    return BlockArray(1:sum(lengths), lengths)
-end
diff --git a/src/util/block_arrays.jl b/src/util/block_arrays.jl
deleted file mode 100644
index 41da5146..00000000
--- a/src/util/block_arrays.jl
+++ /dev/null
@@ -1,25 +0,0 @@
-# This file contains a number of additions to BlockArrays.jl. These are completely
-# independent of Stheno.jl, and will (hopefully) move over to BlockArrays.jl at some point.
-
-function ChainRulesCore.rrule(::typeof(BlockArrays.mortar), _blocks::AbstractArray)
-    y = BlockArrays.mortar(_blocks)
-    Ty = typeof(y)
-    function mortar_pullback(Δ::Tangent)
-        return (NoTangent(), Δ.blocks)
-    end
-    function mortar_pullback(Δ::BlockArray)
-        return mortar_pullback(Tangent{Ty}(; blocks = Δ.blocks, axes=NoTangent()))
-    end
-    return y, mortar_pullback  
-end
-
-# A hook to which I can attach an rrule without commiting type-piracy against BlockArrays.
-_collect(X::BlockArray) = Array(X)
-
-function ChainRulesCore.rrule(::typeof(_collect), X::BlockArray)
-    function Array_pullback(Δ::Array)
-        ΔX = Tangent{Any}(blocks=BlockArray(Δ, axes(X)).blocks, axes=NoTangent())
-        return (NoTangent(), ΔX)
-    end
-    return Array(X), Array_pullback
-end
diff --git a/src/util/covariance_matrices.jl b/src/util/covariance_matrices.jl
deleted file mode 100644
index 273af77e..00000000
--- a/src/util/covariance_matrices.jl
+++ /dev/null
@@ -1,23 +0,0 @@
-# Various specialised operations using the Cholesky factorisation.
-
-Xt_invA_X(A::Cholesky, x::AbstractVector) = sum(abs2, A.U' \ x)
-function Xt_invA_X(A::Cholesky, X::AbstractVecOrMat)
-    V = A.U' \ X
-    return Symmetric(V'V)
-end
-
-Xt_invA_Y(X::AbstractVecOrMat, A::Cholesky, Y::AbstractVecOrMat) = (A.U' \ X)' * (A.U' \ Y)
-
-diag_At_A(A::AbstractVecOrMat) = vec(sum(abs2.(A); dims=1))
-
-function diag_At_B(A::AbstractVecOrMat, B::AbstractVecOrMat)
-    @assert size(A) == size(B)
-    return vec(sum(A .* B; dims=1))
-end
-
-diag_Xt_invA_X(A::Cholesky, X::AbstractVecOrMat) = diag_At_A(A.U' \ X)
-
-function diag_Xt_invA_Y(X::AbstractMatrix, A::Cholesky, Y::AbstractMatrix)
-    @assert size(X) == size(Y)
-    return diag_At_B(A.U' \ X, A.U' \ Y)
-end
diff --git a/src/util/proper_type_piracy.jl b/src/util/proper_type_piracy.jl
deleted file mode 100644
index 85a53641..00000000
--- a/src/util/proper_type_piracy.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-# This file contains some real first-rate type piracy. Sorry in advance.
-
-LinearAlgebra.UpperTriangular(D::Diagonal) = D
diff --git a/src/util/zygote_rules.jl b/src/util/zygote_rules.jl
deleted file mode 100644
index 85d50705..00000000
--- a/src/util/zygote_rules.jl
+++ /dev/null
@@ -1,28 +0,0 @@
-using Zygote
-import Zygote: accum
-
-function accum(D::Diagonal{T}, B::AbstractMatrix) where {T}
-    A = Matrix{Union{T, Nothing}}(undef, size(D))
-    A[diagind(A)] .= D.diag
-    return accum(A, B)
-end
-accum(A::AbstractMatrix, D::Diagonal) = accum(D, A)
-accum(A::Diagonal, B::Diagonal) = Diagonal(accum(diag(A), diag(B)))
-
-#
-# Some very specific broadcasting hacks while Zygote has crappy broadcasting.
-#
-
-import Base.Broadcast: broadcasted
-
-function ChainRulesCore.rrule(::typeof(broadcasted), ::typeof(-), x::AbstractArray)
-    function broadcasted_minus_pullback(Δ)
-        return (NoTangent(), NoTangent(), .-Δ)
-    end
-    return .-x, broadcasted_minus_pullback
-end
-
-function ChainRulesCore.rrule(::typeof(broadcasted), ::typeof(exp), x::AbstractArray)
-    y = exp.(x)
-    return y, Δ->(NoTangent(), NoTangent(), Δ .* y)
-end
diff --git a/test/Project.toml b/test/Project.toml
index 5e7d9e23..7d5d818d 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -6,8 +6,10 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 Documenter = "0.25, 0.26, 0.27"
 FiniteDifferences = "0.12"
 TimerOutputs = "0.5"
+Zygote = "0.6"
diff --git a/test/composite/addition.jl b/test/affine_transformations/addition.jl
similarity index 89%
rename from test/composite/addition.jl
rename to test/affine_transformations/addition.jl
index b852aca7..7d09e46e 100644
--- a/test/composite/addition.jl
+++ b/test/affine_transformations/addition.jl
@@ -2,8 +2,8 @@
     @timedtestset "Correlated GPs" begin
         rng, N, N′, D, gpc = MersenneTwister(123456), 5, 6, 2, GPC()
         X, X′ = ColVecs(randn(rng, D, N)), ColVecs(randn(rng, D, N′))
-        f1 = wrap(GP(1, SEKernel()), gpc)
-        f2 = wrap(GP(2, SEKernel()), gpc)
+        f1 = atomic(GP(1, SEKernel()), gpc)
+        f2 = atomic(GP(2, SEKernel()), gpc)
         f3 = f1 + f2
         f4 = f1 + f3
         f5 = f3 + f4
@@ -32,7 +32,7 @@
     @timedtestset "Verify mean / kernel numerically" begin
         rng, N, D = MersenneTwister(123456), 5, 6
         X = ColVecs(randn(rng, D, N))
-        c, f = randn(rng), wrap(GP(5, SEKernel()), GPC())
+        c, f = randn(rng), atomic(GP(5, SEKernel()), GPC())
 
         @test mean((f + c)(X)) == mean(f(X)) .+ c
         @test mean((f + c)(X)) == c .+ mean(f(X))
@@ -62,8 +62,8 @@
             Dict(:l1=>0.5, :l2=>2.3),
             θ->begin
                 gpc = GPC()
-                f1 = θ[:l1] * wrap(GP(sin, SEKernel()), gpc)
-                f2 = θ[:l2] * wrap(GP(cos, SEKernel()), gpc)
+                f1 = θ[:l1] * atomic(GP(sin, SEKernel()), gpc)
+                f2 = θ[:l2] * atomic(GP(cos, SEKernel()), gpc)
                 f3 = f1 + f2
                 return f3, f3
             end,
@@ -77,8 +77,8 @@
             Dict(:l1=>0.5, :l2=>2.3),
             θ->begin
                 gpc = GPC()
-                f1 = θ[:l1] * wrap(GP(sin, SEKernel()), gpc)
-                f2 = θ[:l2] * wrap(GP(cos, SEKernel()), gpc)
+                f1 = θ[:l1] * atomic(GP(sin, SEKernel()), gpc)
+                f2 = θ[:l2] * atomic(GP(cos, SEKernel()), gpc)
                 f3 = f1 + f2
                 f4 = f1 + f3
                 f5 = f3 + f4
diff --git a/test/composite/compose.jl b/test/affine_transformations/compose.jl
similarity index 83%
rename from test/composite/compose.jl
rename to test/affine_transformations/compose.jl
index ae5532b7..54d90a1e 100644
--- a/test/composite/compose.jl
+++ b/test/affine_transformations/compose.jl
@@ -2,9 +2,9 @@
     @timedtestset "general" begin
         rng, N, N′, gpc = MersenneTwister(123456), 5, 3, GPC()
         x, x′ = randn(rng, N), randn(rng, N′)
-        f = wrap(GP(sin, SEKernel()), gpc)
+        f = atomic(GP(sin, SEKernel()), gpc)
         g = cos
-        h = wrap(GP(exp, ExponentialKernel()), gpc)
+        h = atomic(GP(exp, ExponentialKernel()), gpc)
         fg = f ∘ g
 
         # Check marginals statistics inductively.
@@ -34,7 +34,7 @@
                 MersenneTwister(123456),
                 Dict(:σ=>0.5),
                 θ->begin
-                    f = θ[:σ] * wrap(GP(sin, SEKernel()), GPC())
+                    f = θ[:σ] * atomic(GP(sin, SEKernel()), GPC())
                     return stretch(f, 0.5), f
                 end,
                 collect(range(-2.0, 2.0; length=N)),
@@ -46,7 +46,7 @@
         @timedtestset "scalar stretch" begin
             rng, N, λ = MersenneTwister(123456), 3, 0.51
             x = randn(rng)
-            f = wrap(GP(1.3, SEKernel()), GPC())
+            f = atomic(GP(1.3, SEKernel()), GPC())
             g = stretch(f, λ)
 
             @timedtestset "scalar input" begin
@@ -56,7 +56,7 @@
                     MersenneTwister(123456),
                     Dict(:σ=>0.5, :l=>0.32),
                     θ->begin
-                        f_ = θ[:σ] * wrap(GP(sin, SEKernel()), GPC())
+                        f_ = θ[:σ] * atomic(GP(sin, SEKernel()), GPC())
                         return stretch(f_, θ[:l]), f_
                     end,
                     collect(range(-2.0, 2.0; length=N)),
@@ -77,7 +77,7 @@
         @timedtestset "Vector stretch" begin
             rng, N, D = MersenneTwister(123456), 3, 7
             λ = randn(rng, D)
-            f = wrap(GP(1.0, SEKernel()), GPC())
+            f = atomic(GP(1.0, SEKernel()), GPC())
             g = stretch(f, λ)
 
             X = randn(rng, D, 1)
@@ -87,7 +87,7 @@
         @timedtestset "Matrix stretch" begin
             rng, N, D = MersenneTwister(123456), 3, 7
             A = randn(rng, D, D)
-            f = wrap(GP(1.0, SEKernel()), GPC())
+            f = atomic(GP(1.0, SEKernel()), GPC())
             g = stretch(f, A)
 
             X = randn(rng, D, 1)
@@ -97,9 +97,18 @@
     end
     @timedtestset "Select" begin
         rng, N, D = MersenneTwister(123456), 3, 6
-        @testset "idx isa Integer" begin
+
+        @timedtestset "evaluation" begin
+            t = Stheno.Select(D - 1)
+            x = ColVecs(randn(rng, D, N))
+            @test t.(x) == [t(_x) for _x in x]
+
+            t = Stheno.Select((D-1):D)
+            @test t.(x) == [t(_x) for _x in x]
+        end
+        @timedtestset "idx isa Integer" begin
             idx = 1
-            f = wrap(GP(1.3, SEKernel()), GPC())
+            f = atomic(GP(1.3, SEKernel()), GPC())
             g = select(f, idx)
 
             X = randn(rng, D, N)
@@ -108,9 +117,9 @@
             @test cov(f, g, X_f, X_g) ≈ cov(f, X_f, X_f)
             @test cov(f, g, X_f, X_g) ≈ cov(g, X_g, X_g)
         end
-        @testset "idx isa Vector" begin
+        @timedtestset "idx isa Vector" begin
             idx = [1, 3]
-            f = wrap(GP(1.3, SEKernel()), GPC())
+            f = atomic(GP(1.3, SEKernel()), GPC())
             g = select(f, idx)
 
             X = randn(rng, D, N)
@@ -124,7 +133,7 @@
         @timedtestset "Shift{Float64}" begin
             rng, N, D = MersenneTwister(123456), 3, 6
             a = randn(rng)
-            f = wrap(GP(1.3, SEKernel()), GPC())
+            f = atomic(GP(1.3, SEKernel()), GPC())
             g = shift(f, a)
 
             x = randn(rng, N)
@@ -142,7 +151,7 @@
         @timedtestset "Shift{Vector{Float64}}" begin
             rng, N, D = MersenneTwister(123456), 3, 6
             a = randn(rng, D)
-            f = wrap(GP(1.3, SEKernel()), GPC())
+            f = atomic(GP(1.3, SEKernel()), GPC())
             g = shift(f, a)
 
             X = randn(rng, D, N)
@@ -153,8 +162,12 @@
         end
     end
     @timedtestset "Periodic" begin
-        f = wrap(GP(SEKernel()), GPC())
+        f = atomic(GP(SEKernel()), GPC())
         g = periodic(f, 2.0)
         @test cov(g([0.0]), g([1.0])) ≈ cov(g([0.0]), g([3.0]))
+
+        t = Stheno.Periodic(2.0)
+        x = randn(5)
+        @test t.(x) == [t(_x) for _x in x]
     end
 end
diff --git a/test/composite/cross.jl b/test/affine_transformations/cross.jl
similarity index 66%
rename from test/composite/cross.jl
rename to test/affine_transformations/cross.jl
index d43e2594..dbd53de6 100644
--- a/test/composite/cross.jl
+++ b/test/affine_transformations/cross.jl
@@ -1,9 +1,41 @@
 @timedtestset "cross" begin
+    @timedtestset "block arrays" begin
+        @timedtestset "fdm stuff" begin
+            rng, Ps, Qs = MersenneTwister(123456), [5, 4], [3, 2, 1]
+            X = mortar([randn(rng, P, Q) for P in Ps, Q in Qs], Ps, Qs)
+            vec_X, from_vec = FiniteDifferences.to_vec(X)
+            @test vec_X isa Vector
+            @test from_vec(vec_X) == X
+        end
+        @timedtestset "Stheno._collect ∘ _mortar" begin
+            @timedtestset "BlockVector" begin
+
+                # Generate some blocks.
+                Ps = [5, 6, 7]
+                x = BlockArray(randn(sum(Ps)), Ps).blocks
+
+                # Verify the pullback.
+                ȳ = randn(sum(Ps))
+                adjoint_test(Stheno._collect ∘ Stheno._mortar, ȳ, x)
+            end
+            @timedtestset "BlockMatrix" begin
+
+                # Generate some blocks.
+                Ps = [3, 4, 5]
+                Qs = [6, 7, 8, 9]
+                X = BlockArray(randn(sum(Ps), sum(Qs)), Ps, Qs).blocks
+                Ȳ = randn(sum(Ps), sum(Qs))
+
+                # Verify pullback.
+                adjoint_test(Stheno._collect ∘ Stheno._mortar, Ȳ, X)
+            end
+        end
+    end
     @timedtestset "Correctness tests" begin
         rng, P, Q, gpc = MersenneTwister(123456), 2, 3, GPC()
 
-        f1 = wrap(GP(sin, SEKernel()), gpc)
-        f2 = wrap(GP(cos, SEKernel()), gpc)
+        f1 = atomic(GP(sin, SEKernel()), gpc)
+        f2 = atomic(GP(cos, SEKernel()), gpc)
         f3 = cross([f1, f2])
         f4 = cross([f1])
         f5 = cross([f2])
@@ -51,8 +83,8 @@
         x2, x3 = randn(rng, P), randn(2P)
 
         gpc = GPC()
-        f1 = wrap(GP(sin, SEKernel()), gpc)
-        f2 = wrap(GP(cos, SEKernel()), gpc)
+        f1 = atomic(GP(sin, SEKernel()), gpc)
+        f2 = atomic(GP(cos, SEKernel()), gpc)
         f3 = cross([f1, f2])
         abstractgp_interface_tests(f3, f1, x0, x1, x2, x3)
 
@@ -72,12 +104,4 @@
         #     BlockData([z1, z2]),
         # )
     end
-    @timedtestset "finites_to_block" begin
-        rng, P, Q = MersenneTwister(123456), 3, 5
-        xp, xq = randn(rng, P), randn(rng, Q)
-        gpc = GPC()
-        f_ = wrap(GP(sin, SEKernel()), gpc)
-        g_ = wrap(GP(cos, SEKernel()), gpc)
-        h_x = Stheno.finites_to_block([f_(xp, 1.0), g_(xq, 1.0)])
-    end
 end
diff --git a/test/composite/integrate.jl b/test/affine_transformations/integrate.jl
similarity index 100%
rename from test/composite/integrate.jl
rename to test/affine_transformations/integrate.jl
diff --git a/test/composite/product.jl b/test/affine_transformations/product.jl
similarity index 92%
rename from test/composite/product.jl
rename to test/affine_transformations/product.jl
index 4fae4b63..02139f1f 100644
--- a/test/composite/product.jl
+++ b/test/affine_transformations/product.jl
@@ -1,14 +1,14 @@
 @timedtestset "product" begin
     @timedtestset "GP mul errors" begin
         gpc = GPC()
-        f1 = wrap(GP(SEKernel()), gpc)
-        f2 = wrap(GP(SEKernel()), gpc)
+        f1 = atomic(GP(SEKernel()), gpc)
+        f2 = atomic(GP(SEKernel()), gpc)
         @test_throws ArgumentError f1 * f2
     end
     @timedtestset "multiply by constant" begin
         rng, N, N′, D = MersenneTwister(123456), 3, 5, 2
         X, X′ = ColVecs(randn(rng, D, N)), ColVecs(randn(rng, D, N′))
-        g1, c, c′ = wrap(GP(1, SEKernel()), GPC()), -4.3, 2.1
+        g1, c, c′ = atomic(GP(1, SEKernel()), GPC()), -4.3, 2.1
         g2, g2′ = c * g1, g1 * c′
         g3, g3′ = c * g2, g2′ * c′
         g4, g4′ = c * g3, g3′ * c′
@@ -50,7 +50,7 @@
             x2, x3 = randn(rng, Q), randn(rng, P)
 
             gpc = GPC()
-            f1 = wrap(GP(cos, SEKernel()), gpc)
+            f1 = atomic(GP(cos, SEKernel()), gpc)
             f2 = 5 * f1
             abstractgp_interface_tests(f2, f1, x0, x1, x2, x3)
         end
@@ -59,7 +59,7 @@
                 MersenneTwister(123456),
                 [2.3],
                 θ->begin
-                    f = wrap(GP(0.5, SEKernel()), GPC())
+                    f = atomic(GP(0.5, SEKernel()), GPC())
                     return θ[1] * f, f
                 end,
                 X, X′,
@@ -69,7 +69,7 @@
     @timedtestset "multiply by function" begin
         rng, N, N′, D = MersenneTwister(123456), 3, 5, 2
         X, X′ = ColVecs(randn(rng, D, N)), ColVecs(randn(rng, D, N′))
-        g1, f, f′ = wrap(GP(1, SEKernel()), GPC()), x->sum(sin, x), x->sum(cos, x)
+        g1, f, f′ = atomic(GP(1, SEKernel()), GPC()), x->sum(sin, x), x->sum(cos, x)
         g2, g2′ = f * g1, g1 * f′
         g3, g3′ = f * g2, g2′ * f′
         g4, g4′ = f * g3, g3′ * f′
@@ -117,7 +117,7 @@
             x2, x3 = randn(rng, Q), randn(rng, P)
 
             gpc = GPC()
-            f1 = wrap(GP(cos, SEKernel()), gpc)
+            f1 = atomic(GP(cos, SEKernel()), gpc)
             f2 = sin * f1
             abstractgp_interface_tests(f2, f1, x0, x1, x2, x3)
         end
@@ -126,7 +126,7 @@
                 MersenneTwister(123456),
                 [2.3, 1.3],
                 θ->begin
-                    f = wrap(GP(θ[2], SEKernel()), GPC())
+                    f = atomic(GP(θ[2], SEKernel()), GPC())
                     return (x->θ[1] * x) * f, f
                 end,
                 collect(range(-2.0, 2.0; length=N)),
diff --git a/test/composite/test_util.jl b/test/affine_transformations/test_util.jl
similarity index 100%
rename from test/composite/test_util.jl
rename to test/affine_transformations/test_util.jl
diff --git a/test/gaussian_process_probabilistic_programme.jl b/test/gaussian_process_probabilistic_programme.jl
index b03f5d30..4c42c633 100644
--- a/test/gaussian_process_probabilistic_programme.jl
+++ b/test/gaussian_process_probabilistic_programme.jl
@@ -16,8 +16,8 @@
 
     # Build a toy collection of processes.
     gpc = GPC()
-    f1 = wrap(GP(sin, SEKernel()), gpc)
-    f2 = wrap(GP(cos, Matern52Kernel()), gpc)
+    f1 = atomic(GP(sin, SEKernel()), gpc)
+    f2 = atomic(GP(cos, Matern52Kernel()), gpc)
     f3 = f1 + 3 * f2
 
     # Use them to build a programme.
@@ -107,7 +107,7 @@
     @timedtestset "nested gppp" begin
 
         gpc_outer = GPC()
-        f1_outer = Stheno.wrap(f, gpc_outer)
+        f1_outer = Stheno.atomic(f, gpc_outer)
         f2_outer = 5 * f1_outer
         f_outer = Stheno.GPPP((f1=f1_outer, f2=f2_outer), gpc_outer)
 
diff --git a/test/gp/gp.jl b/test/gp/atomic_gp.jl
similarity index 90%
rename from test/gp/gp.jl
rename to test/gp/atomic_gp.jl
index 91ee06b0..38d63e64 100644
--- a/test/gp/gp.jl
+++ b/test/gp/atomic_gp.jl
@@ -7,7 +7,7 @@ struct ToyAbstractGP <: AbstractGP end
         rng, gpc, N, N′ = MersenneTwister(123456), GPC(), 5, 6
         m = AbstractGPs.CustomMean(sin)
         k = SqExponentialKernel()
-        f = wrap(GP(m, k), gpc)
+        f = atomic(GP(m, k), gpc)
         x = collect(range(-1.0, 1.0; length=N))
         x′ = collect(range(-1.0, 1.0; length=N′))
 
@@ -25,7 +25,7 @@ struct ToyAbstractGP <: AbstractGP end
         gpc = GPC()
         m1, m2 = AbstractGPs.ZeroMean(), AbstractGPs.ConstMean(5)
         k1, k2 = SqExponentialKernel(), SqExponentialKernel()
-        f1, f2 = wrap(GP(m1, k1), gpc), wrap(GP(m2, k2), gpc)
+        f1, f2 = atomic(GP(m1, k1), gpc), atomic(GP(m2, k2), gpc)
 
         @test mean(f1, x) == AbstractGPs._map_meanfunction(m1, x)
         @test mean(f2, x) == AbstractGPs._map_meanfunction(m2, x)
@@ -37,6 +37,6 @@ struct ToyAbstractGP <: AbstractGP end
     end
 
     @timedtestset "wrapped AbstractGP" begin
-        wrap(ToyAbstractGP(), GPC())
+        atomic(ToyAbstractGP(), GPC())
     end
 end
diff --git a/test/gp/derived_gp.jl b/test/gp/derived_gp.jl
new file mode 100644
index 00000000..b6a09c75
--- /dev/null
+++ b/test/gp/derived_gp.jl
@@ -0,0 +1,5 @@
+@timedtestset "derived_gp" begin
+    
+end
+
+# This type is tested in affine_transformations
diff --git a/test/sparse_finite_gp.jl b/test/gp/sparse_finite_gp.jl
similarity index 85%
rename from test/sparse_finite_gp.jl
rename to test/gp/sparse_finite_gp.jl
index 38f95c16..e245b883 100644
--- a/test/sparse_finite_gp.jl
+++ b/test/gp/sparse_finite_gp.jl
@@ -3,20 +3,21 @@
     xu = 0:10
     σ = 1.0
     σu = 1e-3
-    f = wrap(GP(Matern32Kernel()), GPC())
+    f = atomic(GP(Matern32Kernel()), GPC())
     covariance_error = "The covariance matrix of a sparse GP can often be dense and " *
         "can cause the computer to run out of memory. If you are sure you have enough " *
         "memory, you can use `cov(f.fobs)`."
 
     @timedtestset "SparseFiniteGP Constructors" begin
-        f = wrap(GP(Matern32Kernel()), GPC())
+        f = atomic(GP(Matern32Kernel()), GPC())
         @test SparseFiniteGP(f(x), f(xu)) == SparseFiniteGP(f(x, 1e-18), f(xu, 1e-18))
     end
 
     @timedtestset "SparseFiniteGP methods" begin
-        f = wrap(GP(Matern32Kernel()), GPC())
+        f = atomic(GP(Matern32Kernel()), GPC())
         fx = f(x)
         fxu = SparseFiniteGP(f(x), f(xu))
+        @test length(fxu) == length(x)
         @test mean(fxu) == mean(fx)
         @test marginals(fxu) == marginals(fx)
         @test rand(MersenneTwister(12345), fxu) == rand(MersenneTwister(12345), fx)
@@ -25,7 +26,7 @@
     end
 
     @timedtestset "SparseFiniteGP inference" begin
-        f = wrap(GP(Matern32Kernel()), GPC())
+        f = atomic(GP(Matern32Kernel()), GPC())
         fx = f(x, σ)
         fxu = SparseFiniteGP(f(x, σ), f(xu, σu))
         y = rand(MersenneTwister(12345), fxu)
diff --git a/test/abstract_gp.jl b/test/gp/util.jl
similarity index 86%
rename from test/abstract_gp.jl
rename to test/gp/util.jl
index a8734805..f1110156 100644
--- a/test/abstract_gp.jl
+++ b/test/gp/util.jl
@@ -9,7 +9,7 @@ end
     @testset "statistics" begin
         rng, N, N′ = MersenneTwister(123456), 1, 9
         x, x′, Σy, Σy′ = randn(rng, N), randn(rng, N′), zeros(N, N), zeros(N′, N′)
-        f = wrap(GP(sin, SEKernel()), GPC())
+        f = atomic(GP(sin, SEKernel()), GPC())
         fx, fx′ = FiniteGP(f, x, Σy), FiniteGP(f, x′, Σy′)
 
         @test mean(fx) == mean(f, x)
@@ -23,8 +23,8 @@ end
         rng, N, D = MersenneTwister(123456), 10, 2
         X, x, Σy = ColVecs(randn(rng, D, N)), randn(rng, N), zeros(N, N)
         Σy = generate_noise_matrix(rng, N)
-        fX = FiniteGP(wrap(GP(1, SEKernel()), GPC()), X, Σy)
-        fx = FiniteGP(wrap(GP(1, SEKernel()), GPC()), x, Σy)
+        fX = FiniteGP(atomic(GP(1, SEKernel()), GPC()), X, Σy)
+        fx = FiniteGP(atomic(GP(1, SEKernel()), GPC()), x, Σy)
 
         # Check that single-GP samples have the correct dimensions.
         @test length(rand(rng, fX)) == length(X)
@@ -36,7 +36,7 @@ end
     @testset "rand (statistical)" begin
         rng, N, D, μ0, S = MersenneTwister(123456), 10, 2, 1, 100_000
         X, Σy = ColVecs(randn(rng, D, N)), 1e-12
-        f = FiniteGP(wrap(GP(1, SEKernel()), GPC()), X, Σy)
+        f = FiniteGP(atomic(GP(1, SEKernel()), GPC()), X, Σy)
 
         # Check mean + covariance estimates approximately converge for single-GP sampling.
         f̂ = rand(rng, f, S)
@@ -54,7 +54,7 @@ end
         adjoint_test(
             x->rand(
                 MersenneTwister(123456),
-                FiniteGP(wrap(GP(sin, SEKernel()), GPC()), x, Σy),
+                FiniteGP(atomic(GP(sin, SEKernel()), GPC()), x, Σy),
             ),
             randn(rng, N),
             x;
@@ -65,7 +65,7 @@ end
         adjoint_test(
             x->rand(
                 MersenneTwister(123456),
-                FiniteGP(wrap(GP(sin, SEKernel()), GPC()), x, Σy),
+                FiniteGP(atomic(GP(sin, SEKernel()), GPC()), x, Σy),
                 S,
             ),
             randn(rng, N, S),
@@ -77,7 +77,7 @@ end
         rng = MersenneTwister(123456)
         x = randn(rng, T, 123)
         z = randn(rng, T, 13)
-        f = wrap(GP(T(0), SEKernel()), GPC())
+        f = atomic(GP(T(0), SEKernel()), GPC())
 
         fx = f(x, T(0.1))
         u = f(z, T(1e-4))
diff --git a/test/util/abstract_data_set.jl b/test/input_collection_types.jl
similarity index 68%
rename from test/util/abstract_data_set.jl
rename to test/input_collection_types.jl
index 4a9c4894..f09a88c7 100644
--- a/test/util/abstract_data_set.jl
+++ b/test/input_collection_types.jl
@@ -1,9 +1,12 @@
-@timedtestset "abstract_data_set" begin
-    rng, N, D = MersenneTwister(123456), 10, 2
-    x, X = randn(rng, N), randn(rng, D, N)
-
-    # Test BlockDataSet.
+@testset "input_collection_types" begin
     @timedtestset "BlockData" begin
+
+        rng = MersenneTwister(123456)
+        N = 10
+        D = 2
+        x = randn(rng, N)
+        X = randn(rng, D, N)
+
         DX = ColVecs(X)
         DxX = BlockData([x, DX])
         @test size(DxX) == (2N,)
@@ -32,7 +35,17 @@
         @test eltype(BlockData([DX, DX])) <: AbstractVector{Float64}
         @test eltype(BlockData([eachindex(DX), eachindex(DX)])) == Int
 
-        # Convenience constructor.
+        # Convenience constructors.
         @test BlockData(x, DX) == DxX
+
+        # Convenience constructors when we have GPPPInputs.
+        @timedtestset "vcat(::GPPPInput...)" begin
+            ax = GPPPInput(:a, x)
+            bx = GPPPInput(:b, DX)
+            @test vcat(ax, bx) isa BlockData
+            @test vcat(ax, bx) == vcat(collect(ax), collect(bx))
+
+            @test eltype(vcat(ax, ax)) == eltype(ax)
+        end
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 3794063b..da834aef 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -6,7 +6,6 @@ using Stheno
 using Stheno.AbstractGPs
 using Stheno.BlockArrays
 using Stheno.KernelFunctions
-using Stheno.Zygote
 
 # Dependencies that are test-specific.
 using Documenter
@@ -16,6 +15,7 @@ using Random
 using Statistics
 using Test
 using TimerOutputs
+using Zygote
 
 using Stheno:
     mean,
@@ -28,13 +28,7 @@ using Stheno:
     BlockData,
     blocks,
     cross,
-    ColVecs,
-    Xt_invA_Y,
-    Xt_invA_X,
-    diag_At_A,
-    diag_At_B,
-    diag_Xt_invA_X,
-    diag_Xt_invA_Y
+    ColVecs
 
 using Stheno.AbstractGPs.TestUtils: test_internal_abstractgps_interface
 using Stheno.AbstractGPs.Distributions: MvNormal
@@ -50,39 +44,27 @@ include("test_util.jl")
 
 @testset "Stheno" begin
 
-    println("util:")
-    @timedtestset "util" begin
-        include(joinpath("util", "zygote_rules.jl"))
-        include(joinpath("util", "covariance_matrices.jl"))
-        include(joinpath("util", "block_arrays.jl"))
-        include(joinpath("util", "abstract_data_set.jl"))
-    end
+    include("input_collection_types.jl")
 
     println("gp:")
     @timedtestset "gp" begin
-        include(joinpath("gp", "gp.jl"))
-    end
-
-    println("composite:")
-    @timedtestset "composite" begin
-        include(joinpath("composite", "test_util.jl"))
-        include(joinpath("composite", "cross.jl"))
-        include(joinpath("composite", "product.jl"))
-        include(joinpath("composite", "addition.jl"))
-        include(joinpath("composite", "compose.jl"))
+        include(joinpath("gp", "util.jl"))
+        include(joinpath("gp", "atomic_gp.jl"))
+        include(joinpath("gp", "derived_gp.jl"))
+        include(joinpath("gp", "sparse_finite_gp.jl"))
     end
 
-    println("abstract_gp:")
-    @timedtestset "abstract_gp" begin
-        include("abstract_gp.jl")
+    println("affine_transformations:")
+    @timedtestset "affine_transformations" begin
+        include(joinpath("affine_transformations", "test_util.jl"))
+        include(joinpath("affine_transformations", "cross.jl"))
+        include(joinpath("affine_transformations", "addition.jl"))
+        include(joinpath("affine_transformations", "compose.jl"))
+        include(joinpath("affine_transformations", "product.jl"))
     end
 
-    println("gaussian_process_probabilistic_programme:")
     include("gaussian_process_probabilistic_programme.jl")
 
-    println("sparse_finite_gp:")
-    include("sparse_finite_gp.jl")
-
     println("doctests")
     @timedtestset "doctests" begin
         DocMeta.setdocmeta!(
diff --git a/test/util/block_arrays.jl b/test/util/block_arrays.jl
deleted file mode 100644
index d91c3ff6..00000000
--- a/test/util/block_arrays.jl
+++ /dev/null
@@ -1,32 +0,0 @@
-@timedtestset "dense" begin
-    @timedtestset "fdm stuff" begin
-        rng, Ps, Qs = MersenneTwister(123456), [5, 4], [3, 2, 1]
-        X = mortar([randn(rng, P, Q) for P in Ps, Q in Qs], Ps, Qs)
-        vec_X, from_vec = FiniteDifferences.to_vec(X)
-        @test vec_X isa Vector
-        @test from_vec(vec_X) == X
-    end
-    @timedtestset "Stheno._collect ∘ mortar" begin
-        @timedtestset "BlockVector" begin
-
-            # Generate some blocks.
-            Ps = [5, 6, 7]
-            x = BlockArray(randn(sum(Ps)), Ps).blocks
-
-            # Verify the pullback.
-            ȳ = randn(sum(Ps))
-            adjoint_test(Stheno._collect ∘ mortar, ȳ, x)
-        end
-        @timedtestset "BlockMatrix" begin
-
-            # Generate some blocks.
-            Ps = [3, 4, 5]
-            Qs = [6, 7, 8, 9]
-            X = BlockArray(randn(sum(Ps), sum(Qs)), Ps, Qs).blocks
-            Ȳ = randn(sum(Ps), sum(Qs))
-
-            # Verify pullback.
-            adjoint_test(Stheno._collect ∘ mortar, Ȳ, X)
-        end
-    end
-end
diff --git a/test/util/covariance_matrices.jl b/test/util/covariance_matrices.jl
deleted file mode 100644
index a3a4bc0e..00000000
--- a/test/util/covariance_matrices.jl
+++ /dev/null
@@ -1,32 +0,0 @@
-@timedtestset "cholesky" begin
-    # Set up some matrices and factorisations.
-    rng, N, N′, P, Q = MersenneTwister(123456), 5, 3, 6, 2
-    B = randn(rng, N, N)
-    A_ = B' * B + UniformScaling(1e-6)
-    A = cholesky(A_)
-    x, y, z = randn(rng, N), randn(rng, N′), randn(rng, N)
-    X, Y = randn(rng, N, P), randn(rng, N, Q)
-    Z = randn(rng, N, P)
-    A_1_ = exp(randn(rng))
-    A_1 = cholesky(A_1_)
-
-    # Specialised matrix operations.
-    @test Xt_invA_X(A, x) isa Real
-    @test Xt_invA_X(A, x) ≈ x' * (A \ x)
-
-    @test Xt_invA_X(A, X) isa Symmetric
-    @test Xt_invA_X(A, X) ≈ X' * (A \ X)
-
-    @test Xt_invA_Y(X, A, Y) isa Matrix
-    @test Xt_invA_Y(X, A, Y) ≈ X' * (A \ Y)
-
-    @test diag_At_A(x) ≈ [x'x]
-    @test diag_At_A(X) ≈ diag(X'X)
-
-    @test diag_At_B(x, z) ≈ [x'z]
-    @test diag_At_B(X, Z) ≈ diag(X'Z)
-
-    @test diag_Xt_invA_X(A, X) ≈ diag(Xt_invA_X(A, X))
-
-    @test diag_Xt_invA_Y(X, A, Z) ≈ diag(Xt_invA_Y(X, A, Z))
-end
diff --git a/test/util/zygote_rules.jl b/test/util/zygote_rules.jl
deleted file mode 100644
index d3483028..00000000
--- a/test/util/zygote_rules.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-@timedtestset "zygote_rules" begin
-    @timedtestset "Diagonal" begin
-        rng, N = MersenneTwister(123456), 11
-        adjoint_test(Diagonal, rand(rng, N, N), randn(rng, N))
-        adjoint_test(x->Diagonal(x).diag, randn(rng, N), randn(rng, N))
-    end
-    @timedtestset "broadcast" begin
-        @timedtestset "exp" begin
-            rng, N = MersenneTwister(123456), 11
-            adjoint_test(x->exp.(x), randn(rng, N), randn(rng, N))
-        end
-        @timedtestset "-" begin
-            rng, N = MersenneTwister(123456), 11
-            adjoint_test(x->.-x, randn(rng, N), randn(rng, N))
-        end
-    end
-    @timedtestset "ldiv(::Diagonal, ::Matrix)" begin
-        rng, P, Q = MersenneTwister(123456), 13, 15
-        Ȳ = randn(rng, P, Q)
-        d = randn(rng, P)
-        X = randn(rng, P, Q)
-        adjoint_test((d, X)->Diagonal(d) \ X, Ȳ, d, X)
-    end
-end